From 376068312414cfe323e8adf1d51e080b14bdf04a Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 10 Mar 2024 11:42:52 +0100 Subject: [PATCH 1/5] glob.translate: match empty path parts with "**" when recursive=True --- Lib/glob.py | 8 ++++---- Lib/test/test_glob.py | 15 +++++++++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index 343be78a73b20a..926ab24ada2663 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -276,13 +276,13 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): not_sep = f'[^{escaped_seps}]' if include_hidden: one_last_segment = f'{not_sep}+' - one_segment = f'{one_last_segment}{any_sep}' - any_segments = f'(?:.+{any_sep})?' + one_segment = f'(?:{any_sep}|{one_last_segment}{any_sep})' + any_segments = f'{one_segment}*' any_last_segments = '.*' else: one_last_segment = f'[^{escaped_seps}.]{not_sep}*' - one_segment = f'{one_last_segment}{any_sep}' - any_segments = f'(?:{one_segment})*' + one_segment = f'(?:{any_sep}|{one_last_segment}{any_sep})' + any_segments = f'{one_segment}*' any_last_segments = f'{any_segments}(?:{one_last_segment})?' results = [] diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index 8b2ea8f89f5daf..fb358e9d4c39d4 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -372,6 +372,7 @@ def test_translate_matching(self): self.assertIsNotNone(match('foo')) self.assertIsNone(match('.foo')) self.assertIsNotNone(match(os.path.join('foo', 'bar'))) + self.assertIsNotNone(match('foo//bar')) self.assertIsNone(match(os.path.join('foo', '.bar'))) self.assertIsNone(match(os.path.join('.foo', 'bar'))) self.assertIsNone(match(os.path.join('.foo', '.bar'))) @@ -380,6 +381,8 @@ def test_translate_matching(self): self.assertIsNone(match(os.path.join('foo', '.bar'))) self.assertIsNone(match(os.path.join('.foo', 'bar'))) self.assertIsNone(match(os.path.join('.foo', '.bar'))) + self.assertIsNotNone(match('foo//baz')) + self.assertIsNotNone(match('foo/bar//baz')) match = re.compile(glob.translate('*/**', recursive=True)).match self.assertIsNotNone(match(os.path.join('foo', 'bar'))) self.assertIsNone(match(os.path.join('foo', '.bar'))) @@ -393,6 +396,10 @@ def test_translate_matching(self): self.assertIsNone(match(os.path.join('foo', '.bar'))) self.assertIsNotNone(match(os.path.join('foo', 'bar.txt'))) self.assertIsNone(match(os.path.join('foo', '.bar.txt'))) + match = re.compile(glob.translate('**/*', recursive=True, include_hidden=True)).match + self.assertIsNotNone(match('foo//baz')) + self.assertIsNotNone(match('.foo//baz')) + self.assertIsNotNone(match('.foo/bar//baz')) def test_translate(self): def fn(pat): @@ -412,7 +419,7 @@ def fn(pat): self.assertEqual(fn('a**'), r'(?s:a[^/]*)\Z') self.assertEqual(fn('**b'), r'(?s:(?!\.)[^/]*b)\Z') self.assertEqual(fn('/**/*/*.*/**'), - r'(?s:/(?!\.)[^/]*/[^/.][^/]*/(?!\.)[^/]*\.[^/]*/(?!\.)[^/]*)\Z') + r'(?s:/(?!\.)[^/]*/(?:/|[^/.][^/]*/)(?!\.)[^/]*\.[^/]*/(?!\.)[^/]*)\Z') def test_translate_include_hidden(self): def fn(pat): @@ -431,7 +438,7 @@ def fn(pat): self.assertEqual(fn('***'), r'(?s:[^/]*)\Z') self.assertEqual(fn('a**'), r'(?s:a[^/]*)\Z') self.assertEqual(fn('**b'), r'(?s:[^/]*b)\Z') - self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/[^/]*/[^/]+/[^/]*\.[^/]*/[^/]*)\Z') + self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/[^/]*/(?:/|[^/]+/)[^/]*\.[^/]*/[^/]*)\Z') def test_translate_recursive(self): def fn(pat): @@ -443,13 +450,13 @@ def fn(pat): self.assertRaises(ValueError, fn, '***') self.assertRaises(ValueError, fn, 'a**') self.assertRaises(ValueError, fn, '**b') - self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/(?:.+/)?[^/]+/[^/]*\.[^/]*/.*)\Z') + self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/(?:/|[^/]+/)*(?:/|[^/]+/)[^/]*\.[^/]*/.*)\Z') def test_translate_seps(self): def fn(pat): return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\']) self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') - self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') + self.assertEqual(fn('**/*'), r'(?s:(?:[/\\]|[^/\\]+[/\\])*[^/\\]+)\Z') @skip_unless_symlink From 162430890039fdbae88230536b9951e110e2ba11 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 10 Mar 2024 16:33:09 +0100 Subject: [PATCH 2/5] glob.translate: don't match empty path part at beginning of string --- Lib/glob.py | 2 ++ Lib/test/test_glob.py | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index 926ab24ada2663..8febfe3a82d8d6 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -294,6 +294,8 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): continue if recursive: if part == '**': + if idx == 0: + results.append(fr'(?!{any_sep})') if idx < last_part_idx: if parts[idx + 1] != '**': results.append(any_segments) diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index fb358e9d4c39d4..64dfd84bec4fc5 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -397,6 +397,8 @@ def test_translate_matching(self): self.assertIsNotNone(match(os.path.join('foo', 'bar.txt'))) self.assertIsNone(match(os.path.join('foo', '.bar.txt'))) match = re.compile(glob.translate('**/*', recursive=True, include_hidden=True)).match + self.assertIsNotNone(match('baz')) + self.assertIsNone(match('/baz')) self.assertIsNotNone(match('foo//baz')) self.assertIsNotNone(match('.foo//baz')) self.assertIsNotNone(match('.foo/bar//baz')) @@ -445,8 +447,9 @@ def fn(pat): return glob.translate(pat, recursive=True, include_hidden=True, seps='/') self.assertEqual(fn('*'), r'(?s:[^/]+)\Z') self.assertEqual(fn('?'), r'(?s:[^/])\Z') - self.assertEqual(fn('**'), r'(?s:.*)\Z') - self.assertEqual(fn('**/**'), r'(?s:.*)\Z') + self.assertEqual(fn('**'), r'(?s:(?!/).*)\Z') + self.assertEqual(fn('/**'), r'(?s:/.*)\Z') + self.assertEqual(fn('**/**'), r'(?s:(?!/).*)\Z') self.assertRaises(ValueError, fn, '***') self.assertRaises(ValueError, fn, 'a**') self.assertRaises(ValueError, fn, '**b') @@ -456,7 +459,7 @@ def test_translate_seps(self): def fn(pat): return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\']) self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') - self.assertEqual(fn('**/*'), r'(?s:(?:[/\\]|[^/\\]+[/\\])*[^/\\]+)\Z') + self.assertEqual(fn('**/*'), r'(?s:(?![/\\])(?:[/\\]|[^/\\]+[/\\])*[^/\\]+)\Z') @skip_unless_symlink From 0e6d7ffcb017c610d76c7426fe044da3c32bf1a2 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 10 Mar 2024 17:19:36 +0100 Subject: [PATCH 3/5] add news entry --- .../next/Library/2024-03-10-17-19-09.gh-issue-116393.JmhKY4.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2024-03-10-17-19-09.gh-issue-116393.JmhKY4.rst diff --git a/Misc/NEWS.d/next/Library/2024-03-10-17-19-09.gh-issue-116393.JmhKY4.rst b/Misc/NEWS.d/next/Library/2024-03-10-17-19-09.gh-issue-116393.JmhKY4.rst new file mode 100644 index 00000000000000..eb1f2e5a785870 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-10-17-19-09.gh-issue-116393.JmhKY4.rst @@ -0,0 +1 @@ +:func:`glob.translate` "**" can match empty path parts when recursive=True From eaf7ce2992ea582ae007929a9c69d6f6b1ef74b3 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 10 Mar 2024 18:12:12 +0100 Subject: [PATCH 4/5] Revert "glob.translate: don't match empty path part at beginning of string" This reverts commit 162430890039fdbae88230536b9951e110e2ba11. --- Lib/glob.py | 2 -- Lib/test/test_glob.py | 9 +++------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index 8febfe3a82d8d6..926ab24ada2663 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -294,8 +294,6 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): continue if recursive: if part == '**': - if idx == 0: - results.append(fr'(?!{any_sep})') if idx < last_part_idx: if parts[idx + 1] != '**': results.append(any_segments) diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index 64dfd84bec4fc5..fb358e9d4c39d4 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -397,8 +397,6 @@ def test_translate_matching(self): self.assertIsNotNone(match(os.path.join('foo', 'bar.txt'))) self.assertIsNone(match(os.path.join('foo', '.bar.txt'))) match = re.compile(glob.translate('**/*', recursive=True, include_hidden=True)).match - self.assertIsNotNone(match('baz')) - self.assertIsNone(match('/baz')) self.assertIsNotNone(match('foo//baz')) self.assertIsNotNone(match('.foo//baz')) self.assertIsNotNone(match('.foo/bar//baz')) @@ -447,9 +445,8 @@ def fn(pat): return glob.translate(pat, recursive=True, include_hidden=True, seps='/') self.assertEqual(fn('*'), r'(?s:[^/]+)\Z') self.assertEqual(fn('?'), r'(?s:[^/])\Z') - self.assertEqual(fn('**'), r'(?s:(?!/).*)\Z') - self.assertEqual(fn('/**'), r'(?s:/.*)\Z') - self.assertEqual(fn('**/**'), r'(?s:(?!/).*)\Z') + self.assertEqual(fn('**'), r'(?s:.*)\Z') + self.assertEqual(fn('**/**'), r'(?s:.*)\Z') self.assertRaises(ValueError, fn, '***') self.assertRaises(ValueError, fn, 'a**') self.assertRaises(ValueError, fn, '**b') @@ -459,7 +456,7 @@ def test_translate_seps(self): def fn(pat): return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\']) self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') - self.assertEqual(fn('**/*'), r'(?s:(?![/\\])(?:[/\\]|[^/\\]+[/\\])*[^/\\]+)\Z') + self.assertEqual(fn('**/*'), r'(?s:(?:[/\\]|[^/\\]+[/\\])*[^/\\]+)\Z') @skip_unless_symlink From b0d1c778402e43c7551bc5a641a4c59e41b69f7b Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 10 Mar 2024 18:29:51 +0100 Subject: [PATCH 5/5] docs: fix glob.translate example --- Doc/library/glob.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 19a0bbba8966ba..8e1056849817bd 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -126,7 +126,7 @@ The :mod:`glob` module defines the following functions: >>> >>> regex = glob.translate('**/*.txt', recursive=True, include_hidden=True) >>> regex - '(?s:(?:.+/)?[^/]*\\.txt)\\Z' + '(?s:(?:/|[^/]+/)*[^/]*\\.txt)\\Z' >>> reobj = re.compile(regex) >>> reobj.match('foo/bar/baz.txt')