Skip to content

bpo-38764: Deterministic globbing. #17105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions Doc/library/glob.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,21 @@
single: . (dot); in glob-style wildcards

The :mod:`glob` module finds all the pathnames matching a specified pattern
according to the rules used by the Unix shell, although results are returned in
arbitrary order. No tilde expansion is done, but ``*``, ``?``, and character
according to the rules used by the Unix shell.
No tilde expansion is done, but ``*``, ``?``, and character
ranges expressed with ``[]`` will be correctly matched. This is done by using
the :func:`os.scandir` and :func:`fnmatch.fnmatch` functions in concert, and
not by actually invoking a subshell. Note that unlike :func:`fnmatch.fnmatch`,
:mod:`glob` treats filenames beginning with a dot (``.``) as special cases.
(For tilde and shell variable expansion, use :func:`os.path.expanduser` and
:func:`os.path.expandvars`.)

The order of returned results is consistent across all platforms, and traversal
of a directory's children is guaranteed to be in sorted ASCIIbetical order.
**However, this does not necessarily mean that the entire sequence will be
returned in sorted order, since recursive patterns use a breadth-first
search.**

For a literal match, wrap the meta-characters in brackets.
For example, ``'[?]'`` matches the character ``'?'``.

Expand All @@ -42,8 +48,7 @@ For example, ``'[?]'`` matches the character ``'?'``.
a string containing a path specification. *pathname* can be either absolute
(like :file:`/usr/src/Python-1.5/Makefile`) or relative (like
:file:`../../Tools/\*/\*.gif`), and can contain shell-style wildcards. Broken
symlinks are included in the results (as in the shell). Whether or not the
results are sorted depends on the file system.
symlinks are included in the results (as in the shell).

.. index::
single: **; in glob-style wildcards
Expand All @@ -62,6 +67,9 @@ For example, ``'[?]'`` matches the character ``'?'``.
.. versionchanged:: 3.5
Support for recursive globs using "``**``".

.. versionchanged:: 3.9
Guaranteed traversal order for all platforms.


.. function:: iglob(pathname, *, recursive=False)

Expand Down
4 changes: 2 additions & 2 deletions Lib/glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _iglob(pathname, recursive, dironly):
# takes a literal basename (so it only has to check for its existence).

def _glob1(dirname, pattern, dironly):
names = list(_iterdir(dirname, dironly))
names = sorted(_iterdir(dirname, dironly))
if not _ishidden(pattern):
names = (x for x in names if not _ishidden(x))
return fnmatch.filter(names, pattern)
Expand Down Expand Up @@ -132,7 +132,7 @@ def _iterdir(dirname, dironly):

# Recursively yields relative pathnames inside a literal directory.
def _rlistdir(dirname, dironly):
names = list(_iterdir(dirname, dironly))
names = sorted(_iterdir(dirname, dironly))
for x in names:
if not _ishidden(x):
yield x
Expand Down
65 changes: 31 additions & 34 deletions Lib/test/test_glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,11 @@ def glob(self, *parts, **kwargs):
self.assertCountEqual(glob.iglob(os.fsencode(p), **kwargs), bres)
return res

def assertSequencesEqual_noorder(self, l1, l2):
l1 = list(l1)
l2 = list(l2)
self.assertEqual(set(l1), set(l2))
self.assertEqual(sorted(l1), sorted(l2))
def assertSequencesEqual(self, l1, l2):
self.assertListEqual(list(l1), list(l2))

def test_glob_literal(self):
eq = self.assertSequencesEqual_noorder
eq = self.assertSequencesEqual
eq(self.glob('a'), [self.norm('a')])
eq(self.glob('a', 'D'), [self.norm('a', 'D')])
eq(self.glob('aab'), [self.norm('aab')])
Expand All @@ -79,8 +76,8 @@ def test_glob_literal(self):
self.assertEqual({type(r) for r in res}, {bytes})

def test_glob_one_directory(self):
eq = self.assertSequencesEqual_noorder
eq(self.glob('a*'), map(self.norm, ['a', 'aab', 'aaa']))
eq = self.assertSequencesEqual
eq(self.glob('a*'), map(self.norm, ['a', 'aaa', 'aab']))
eq(self.glob('*a'), map(self.norm, ['a', 'aaa']))
eq(self.glob('.*'), map(self.norm, ['.aa', '.bb']))
eq(self.glob('?aa'), map(self.norm, ['aaa']))
Expand All @@ -89,7 +86,7 @@ def test_glob_one_directory(self):
eq(self.glob('*q'), [])

def test_glob_nested_directory(self):
eq = self.assertSequencesEqual_noorder
eq = self.assertSequencesEqual
if os.path.normcase("abCD") == "abCD":
# case-sensitive filesystem
eq(self.glob('a', 'bcd', 'E*'), [self.norm('a', 'bcd', 'EF')])
Expand All @@ -100,7 +97,7 @@ def test_glob_nested_directory(self):
eq(self.glob('a', 'bcd', '*g'), [self.norm('a', 'bcd', 'efg')])

def test_glob_directory_names(self):
eq = self.assertSequencesEqual_noorder
eq = self.assertSequencesEqual
eq(self.glob('*', 'D'), [self.norm('a', 'D')])
eq(self.glob('*', '*a'), [])
eq(self.glob('a', '*', '*', '*a'),
Expand Down Expand Up @@ -143,7 +140,7 @@ def test_glob_bytes_directory_with_trailing_slash(self):

@skip_unless_symlink
def test_glob_symlinks(self):
eq = self.assertSequencesEqual_noorder
eq = self.assertSequencesEqual
eq(self.glob('sym3'), [self.norm('sym3')])
eq(self.glob('sym3', '*'), [self.norm('sym3', 'EF'),
self.norm('sym3', 'efg')])
Expand All @@ -155,15 +152,15 @@ def test_glob_symlinks(self):

@skip_unless_symlink
def test_glob_broken_symlinks(self):
eq = self.assertSequencesEqual_noorder
eq = self.assertSequencesEqual
eq(self.glob('sym*'), [self.norm('sym1'), self.norm('sym2'),
self.norm('sym3')])
eq(self.glob('sym1'), [self.norm('sym1')])
eq(self.glob('sym2'), [self.norm('sym2')])

@unittest.skipUnless(sys.platform == "win32", "Win32 specific test")
def test_glob_magic_in_drive(self):
eq = self.assertSequencesEqual_noorder
eq = self.assertSequencesEqual
eq(glob.glob('*:'), [])
eq(glob.glob(b'*:'), [])
eq(glob.glob('?:'), [])
Expand Down Expand Up @@ -200,23 +197,23 @@ def rglob(self, *parts, **kwargs):
return self.glob(*parts, recursive=True, **kwargs)

def test_recursive_glob(self):
eq = self.assertSequencesEqual_noorder
full = [('EF',), ('ZZZ',),
('a',), ('a', 'D'),
('a', 'bcd'),
('a', 'bcd', 'EF'),
('a', 'bcd', 'efg'),
('a', 'bcd', 'efg', 'ha'),
('aaa',), ('aaa', 'zzzF'),
('aab',), ('aab', 'F'),
]
eq = self.assertSequencesEqual
bfs = [('EF',), ('ZZZ',), ('a',), ('aaa',), ('aab',)]
if can_symlink():
full += [('sym1',), ('sym2',),
('sym3',),
('sym3', 'EF'),
('sym3', 'efg'),
('sym3', 'efg', 'ha'),
]
bfs += [('sym1',), ('sym2',), ('sym3',)]
bfs += [
('a', 'D'), ('a', 'bcd'),
('a', 'bcd', 'EF'), ('a', 'bcd', 'efg'),
('a', 'bcd', 'efg', 'ha'),
('aaa', 'zzzF'),
('aab', 'F'),
]
if can_symlink():
bfs += [
('sym3', 'EF'), ('sym3', 'efg'),
('sym3', 'efg', 'ha'),
]
full = sorted(bfs)
eq(self.rglob('**'), self.joins(('',), *full))
eq(self.rglob(os.curdir, '**'),
self.joins((os.curdir, ''), *((os.curdir,) + i for i in full)))
Expand All @@ -230,11 +227,11 @@ def test_recursive_glob(self):
('a', ''), ('a', 'D'), ('a', 'bcd'), ('a', 'bcd', 'EF'),
('a', 'bcd', 'efg'), ('a', 'bcd', 'efg', 'ha')))
eq(self.rglob('a**'), self.joins(('a',), ('aaa',), ('aab',)))
expect = [('a', 'bcd', 'EF'), ('EF',)]
expect = [('EF',), ('a', 'bcd', 'EF')]
if can_symlink():
expect += [('sym3', 'EF')]
eq(self.rglob('**', 'EF'), self.joins(*expect))
expect = [('a', 'bcd', 'EF'), ('aaa', 'zzzF'), ('aab', 'F'), ('EF',)]
expect = [('EF',), ('a', 'bcd', 'EF'), ('aaa', 'zzzF'), ('aab', 'F'),]
if can_symlink():
expect += [('sym3', 'EF')]
eq(self.rglob('**', '*F'), self.joins(*expect))
Expand All @@ -249,17 +246,17 @@ def test_recursive_glob(self):
eq(glob.glob(join('**', ''), recursive=True),
[join(*i) for i in dirs])
eq(glob.glob(join('**', '*'), recursive=True),
[join(*i) for i in full])
[join(*i) for i in bfs])
eq(glob.glob(join(os.curdir, '**'), recursive=True),
[join(os.curdir, '')] + [join(os.curdir, *i) for i in full])
eq(glob.glob(join(os.curdir, '**', ''), recursive=True),
[join(os.curdir, '')] + [join(os.curdir, *i) for i in dirs])
eq(glob.glob(join(os.curdir, '**', '*'), recursive=True),
[join(os.curdir, *i) for i in full])
[join(os.curdir, *i) for i in bfs])
eq(glob.glob(join('**','zz*F'), recursive=True),
[join('aaa', 'zzzF')])
eq(glob.glob('**zz*F', recursive=True), [])
expect = [join('a', 'bcd', 'EF'), 'EF']
expect = ['EF', join('a', 'bcd', 'EF'),]
if can_symlink():
expect += [join('sym3', 'EF')]
eq(glob.glob(join('**', 'EF'), recursive=True), expect)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Guaranteed traversal order for :meth:`glob.glob` and :meth:`glob.iglob` on all platforms.