From 2715491e5da16e2a6b6786303e52decbdba4ff0f Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 8 Feb 2025 03:13:39 +0000 Subject: [PATCH 1/4] GH-129835: Yield path with trailing slash from `ReadablePath.glob('')` In the private pathlib ABCs, make `ReadablePath.glob('')` yield a path with a trailing slash (if it yields anything at all). As a result, `glob()` works similarly to `joinpath()` when given a non-magic pattern. In the globbing implementation, we preemptively add trailing slashes to intermediate paths if there are pattern parts remaining; this removes the need to check for existing trailing slashes (in the removed `add_slash()` method) at subsequent steps. --- Lib/glob.py | 60 +++++++++-------------- Lib/pathlib/_abc.py | 2 +- Lib/pathlib/_local.py | 2 +- Lib/test/test_pathlib/test_pathlib_abc.py | 2 +- 4 files changed, 27 insertions(+), 39 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index a834ea7f7ce556..6d341a7dd2c985 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -352,12 +352,6 @@ def scandir(path): """ raise NotImplementedError - @staticmethod - def add_slash(path): - """Returns a path with a trailing slash added. - """ - raise NotImplementedError - @staticmethod def concat_path(path, text): """Implements path concatenation. @@ -389,10 +383,12 @@ def selector(self, parts): def special_selector(self, part, parts): """Returns a function that selects special children of the given path. """ + if parts: + part += self.sep select_next = self.selector(parts) def select_special(path, exists=False): - path = self.concat_path(self.add_slash(path), part) + path = self.concat_path(path, part) return select_next(path, exists) return select_special @@ -402,14 +398,16 @@ def literal_selector(self, part, parts): # Optimization: consume and join any subsequent literal parts here, # rather than leaving them for the next selector. This reduces the - # number of string concatenation operations and calls to add_slash(). + # number of string concatenation operations. while parts and magic_check.search(parts[-1]) is None: part += self.sep + parts.pop() + if parts: + part += self.sep select_next = self.selector(parts) def select_literal(path, exists=False): - path = self.concat_path(self.add_slash(path), part) + path = self.concat_path(path, part) return select_next(path, exists=False) return select_literal @@ -437,7 +435,7 @@ def select_wildcard(path, exists=False): continue except OSError: continue - if dir_only: + entry_path = self.concat_path(entry_path, self.sep) yield from select_next(entry_path, exists=True) else: yield entry_path @@ -461,13 +459,14 @@ def recursive_selector(self, part, parts): if follow_symlinks: while parts and parts[-1] not in _special_parts: part += self.sep + parts.pop() + if parts: + part += self.sep match = None if part == '**' else self.compile(part) dir_only = bool(parts) select_next = self.selector(parts) def select_recursive(path, exists=False): - path = self.add_slash(path) match_pos = len(str(path)) if match is None or match(str(path), match_pos): yield from select_next(path, exists) @@ -490,16 +489,20 @@ def select_recursive_step(stack, match_pos): except OSError: pass - if is_dir or not dir_only: - if match is None or match(str(entry_path), match_pos): - if dir_only: - yield from select_next(entry_path, exists=True) - else: - # Optimization: directly yield the path if this is - # last pattern part. - yield entry_path - if is_dir: - stack.append(entry_path) + if dir_only: + if not is_dir: + continue + entry_path = self.concat_path(entry_path, self.sep) + + if match is None or match(str(entry_path), match_pos): + if dir_only: + yield from select_next(entry_path, exists=True) + else: + # Optimization: directly yield the path if this is + # last pattern part. + yield entry_path + if is_dir: + stack.append(entry_path) return select_recursive @@ -528,27 +531,12 @@ def scandir(path): entries = list(scandir_it) return ((entry, entry.name, entry.path) for entry in entries) - if os.name == 'nt': - @staticmethod - def add_slash(pathname): - tail = os.path.splitroot(pathname)[2] - if not tail or tail[-1] in '\\/': - return pathname - return f'{pathname}\\' - else: - @staticmethod - def add_slash(pathname): - if not pathname or pathname[-1] == '/': - return pathname - return f'{pathname}/' - class _PathGlobber(_GlobberBase): """Provides shell-style pattern matching and globbing for pathlib paths. """ lexists = operator.methodcaller('exists', follow_symlinks=False) - add_slash = operator.methodcaller('joinpath', '') @staticmethod def scandir(path): diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index d20f04fc5b6dc3..65d91e4d67b463 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -460,7 +460,7 @@ def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True): recursive = True if recurse_symlinks else _no_recurse_symlinks globber = _PathGlobber(self.parser.sep, case_sensitive, case_pedantic, recursive) select = globber.selector(parts) - return select(self) + return select(self.joinpath('')) def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True): """Recursively yield all existing files (of any kind, including diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 07d361d7b1352c..ad35b705808d68 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -959,7 +959,7 @@ def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): globber = _StringGlobber(self.parser.sep, case_sensitive, case_pedantic, recursive) select = globber.selector(parts[::-1]) root = str(self) - paths = select(root) + paths = select(root + self.parser.sep) # Normalize results if root == '.': diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 696874273a21fd..836d8387bdc433 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1125,7 +1125,7 @@ def test_glob_windows(self): def test_glob_empty_pattern(self): P = self.cls p = P(self.base) - self.assertEqual(list(p.glob("")), [p]) + self.assertEqual(list(p.glob("")), [p.joinpath("")]) def test_glob_case_sensitive(self): P = self.cls From 266c9959941e36e7a9eb8ee4babf09f177eb032b Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 8 Feb 2025 03:27:40 +0000 Subject: [PATCH 2/4] Use join() rather than appending sep --- Lib/pathlib/_local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index ad35b705808d68..6cdcd448991c8c 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -959,7 +959,7 @@ def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): globber = _StringGlobber(self.parser.sep, case_sensitive, case_pedantic, recursive) select = globber.selector(parts[::-1]) root = str(self) - paths = select(root + self.parser.sep) + paths = select(self.parser.join(root, '')) # Normalize results if root == '.': From 3d3a82a66d3fae79f386b2bbbb45af80def51a8a Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 8 Feb 2025 03:53:39 +0000 Subject: [PATCH 3/4] Ignore trailing slash when matching terminal `**` sequence --- Lib/glob.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index 6d341a7dd2c985..39a152a20ad1c4 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -459,9 +459,6 @@ def recursive_selector(self, part, parts): if follow_symlinks: while parts and parts[-1] not in _special_parts: part += self.sep + parts.pop() - if parts: - part += self.sep - match = None if part == '**' else self.compile(part) dir_only = bool(parts) select_next = self.selector(parts) @@ -489,20 +486,19 @@ def select_recursive_step(stack, match_pos): except OSError: pass - if dir_only: - if not is_dir: - continue - entry_path = self.concat_path(entry_path, self.sep) - - if match is None or match(str(entry_path), match_pos): + if is_dir or not dir_only: + entry_path_str = str(entry_path) if dir_only: - yield from select_next(entry_path, exists=True) - else: - # Optimization: directly yield the path if this is - # last pattern part. - yield entry_path - if is_dir: - stack.append(entry_path) + entry_path = self.concat_path(entry_path, self.sep) + if match is None or match(entry_path_str, match_pos): + if dir_only: + yield from select_next(entry_path, exists=True) + else: + # Optimization: directly yield the path if this is + # last pattern part. + yield entry_path + if is_dir: + stack.append(entry_path) return select_recursive From 8076f247b6d8404adcaa201d661396a44822a78a Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 8 Feb 2025 03:56:44 +0000 Subject: [PATCH 4/4] Spacing --- Lib/glob.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/glob.py b/Lib/glob.py index 39a152a20ad1c4..cd8859e63318f3 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -459,6 +459,7 @@ def recursive_selector(self, part, parts): if follow_symlinks: while parts and parts[-1] not in _special_parts: part += self.sep + parts.pop() + match = None if part == '**' else self.compile(part) dir_only = bool(parts) select_next = self.selector(parts)