diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 4847ac24c77513..14118127835bbe 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -855,7 +855,7 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.5 -.. method:: Path.glob(pattern) +.. method:: Path.glob(pattern, *, case_sensitive=None) Glob the given relative *pattern* in the directory represented by this path, yielding all matching files (of any kind):: @@ -876,6 +876,11 @@ call fails (for example because the path doesn't exist). PosixPath('setup.py'), PosixPath('test_pathlib.py')] + By default, or when the *case_sensitive* keyword-only argument is set to + ``None``, this method matches paths using platform-specific casing rules: + typically, case-sensitive on POSIX, and case-insensitive on Windows. + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. + .. note:: Using the "``**``" pattern in large directory trees may consume an inordinate amount of time. @@ -886,6 +891,9 @@ call fails (for example because the path doesn't exist). Return only directories if *pattern* ends with a pathname components separator (:data:`~os.sep` or :data:`~os.altsep`). + .. versionadded:: 3.12 + The *case_sensitive* argument. + .. method:: Path.group() Return the name of the group owning the file. :exc:`KeyError` is raised @@ -1271,7 +1279,7 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.6 The *strict* argument (pre-3.6 behavior is strict). -.. method:: Path.rglob(pattern) +.. method:: Path.rglob(pattern, *, case_sensitive=None) Glob the given relative *pattern* recursively. This is like calling :func:`Path.glob` with "``**/``" added in front of the *pattern*, where @@ -1284,12 +1292,20 @@ call fails (for example because the path doesn't exist). PosixPath('setup.py'), PosixPath('test_pathlib.py')] + By default, or when the *case_sensitive* keyword-only argument is set to + ``None``, this method matches paths using platform-specific casing rules: + typically, case-sensitive on POSIX, and case-insensitive on Windows. + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. + .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob .. versionchanged:: 3.11 Return only directories if *pattern* ends with a pathname components separator (:data:`~os.sep` or :data:`~os.altsep`). + .. versionadded:: 3.12 + The *case_sensitive* argument. + .. method:: Path.rmdir() Remove this directory. The directory must be empty. diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8a1651c23d7f4d..f32e1e2d822834 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -62,7 +62,7 @@ def _is_case_sensitive(flavour): # @functools.lru_cache() -def _make_selector(pattern_parts, flavour): +def _make_selector(pattern_parts, flavour, case_sensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if not pat: @@ -75,17 +75,17 @@ def _make_selector(pattern_parts, flavour): raise ValueError("Invalid pattern: '**' can only be an entire path component") else: cls = _WildcardSelector - return cls(pat, child_parts, flavour) + return cls(pat, child_parts, flavour, case_sensitive) class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, flavour): + def __init__(self, child_parts, flavour, case_sensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, flavour) + self.successor = _make_selector(child_parts, flavour, case_sensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -108,8 +108,9 @@ def _select_from(self, parent_path, scandir): class _ParentSelector(_Selector): - def __init__(self, name, child_parts, flavour): - _Selector.__init__(self, child_parts, flavour) + + def __init__(self, name, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) def _select_from(self, parent_path, scandir): path = parent_path._make_child_relpath('..') @@ -119,10 +120,13 @@ def _select_from(self, parent_path, scandir): class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - flags = re.NOFLAG if _is_case_sensitive(flavour) else re.IGNORECASE + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) + if case_sensitive is None: + # TODO: evaluate case-sensitivity of each directory in _select_from() + case_sensitive = _is_case_sensitive(flavour) + flags = re.NOFLAG if case_sensitive else re.IGNORECASE self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch - _Selector.__init__(self, child_parts, flavour) def _select_from(self, parent_path, scandir): try: @@ -153,8 +157,8 @@ def _select_from(self, parent_path, scandir): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) def _iterate_directories(self, parent_path, scandir): yield parent_path @@ -819,7 +823,7 @@ def _scandir(self): # includes scandir(), which is used to implement glob(). return os.scandir(self) - def glob(self, pattern): + def glob(self, pattern, *, case_sensitive=None): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. """ @@ -831,11 +835,11 @@ def glob(self, pattern): raise NotImplementedError("Non-relative patterns are unsupported") if pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') - selector = _make_selector(tuple(pattern_parts), self._flavour) + selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) for p in selector.select_from(self): yield p - def rglob(self, pattern): + def rglob(self, pattern, *, case_sensitive=None): """Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. @@ -846,7 +850,7 @@ def rglob(self, pattern): raise NotImplementedError("Non-relative patterns are unsupported") if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') - selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour) + selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) for p in selector.select_from(self): yield p diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 424bb92a87d112..a932e03df4236d 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1816,6 +1816,18 @@ def _check(glob, expected): else: _check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"]) + def test_glob_case_sensitive(self): + P = self.cls + def _check(path, pattern, case_sensitive, expected): + actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)} + expected = {str(P(BASE, q)) for q in expected} + self.assertEqual(actual, expected) + path = P(BASE) + _check(path, "DIRB/FILE*", True, []) + _check(path, "DIRB/FILE*", False, ["dirB/fileB"]) + _check(path, "dirb/file*", True, []) + _check(path, "dirb/file*", False, ["dirB/fileB"]) + def test_rglob_common(self): def _check(glob, expected): self.assertEqual(set(glob), { P(BASE, q) for q in expected }) diff --git a/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst b/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst new file mode 100644 index 00000000000000..ef5690533985d5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst @@ -0,0 +1,2 @@ +Add *case_sensitive* keyword-only argument to :meth:`pathlib.Path.glob` and +:meth:`~pathlib.Path.rglob`.