diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 76ee721bfb5e33..0e8bb5ab10d916 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -403,55 +403,66 @@ def realpath(filename, *, strict=False): """Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path.""" filename = os.fspath(filename) - path, ok = _joinrealpath(filename[:0], filename, strict, {}) - return abspath(path) - -# Join two paths, normalizing and eliminating any symbolic links -# encountered in the second path. -# Two leading slashes are replaced by a single slash. -def _joinrealpath(path, rest, strict, seen): - if isinstance(path, bytes): + if isinstance(filename, bytes): sep = b'/' curdir = b'.' pardir = b'..' + getcwd = os.getcwdb else: sep = '/' curdir = '.' pardir = '..' + getcwd = os.getcwd + + # The stack of unresolved path parts. When popped, a special value of None + # indicates that a symlink target has been resolved, and that the original + # symlink path can be retrieved by popping again. The [::-1] slice is a + # very fast way of spelling list(reversed(...)). + rest = filename.split(sep)[::-1] + + # The resolved path, which is absolute throughout this function. + # Note: getcwd() returns a normalized and symlink-free path. + path = sep if filename.startswith(sep) else getcwd() - if rest.startswith(sep): - rest = rest[1:] - path = sep + # Mapping from symlink paths to *fully resolved* symlink targets. If a + # symlink is encountered but not yet resolved, the value is None. This is + # used both to detect symlink loops and to speed up repeated traversals of + # the same links. + seen = {} + + # Whether we're calling lstat() and readlink() to resolve symlinks. If we + # encounter an OSError for a symlink loop in non-strict mode, this is + # switched off. + querying = True while rest: - name, _, rest = rest.partition(sep) + name = rest.pop() + if name is None: + # resolved symlink target + seen[rest.pop()] = path + continue if not name or name == curdir: # current dir continue if name == pardir: # parent dir - if path: - parent, name = split(path) - if name == pardir: - # ../.. - path = join(path, pardir) - else: - # foo/bar/.. -> foo - path = parent - else: - # .. - path = pardir + path = path[:path.rindex(sep)] or sep + continue + if path == sep: + newpath = path + name + else: + newpath = path + sep + name + if not querying: + path = newpath continue - newpath = join(path, name) try: st = os.lstat(newpath) + if not stat.S_ISLNK(st.st_mode): + path = newpath + continue except OSError: if strict: raise - is_link = False - else: - is_link = stat.S_ISLNK(st.st_mode) - if not is_link: path = newpath continue # Resolve the symbolic link @@ -467,14 +478,23 @@ def _joinrealpath(path, rest, strict, seen): os.stat(newpath) else: # Return already resolved part + rest of the path unchanged. - return join(newpath, rest), False + path = newpath + querying = False + continue seen[newpath] = None # not resolved symlink - path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen) - if not ok: - return join(path, rest), False - seen[newpath] = path # resolved symlink + target = os.readlink(newpath) + if target.startswith(sep): + # Symlink target is absolute; reset resolved path. + path = sep + # Push the symlink path onto the stack, and signal its specialness by + # also pushing None. When these entries are popped, we'll record the + # fully-resolved symlink target in the 'seen' mapping. + rest.append(newpath) + rest.append(None) + # Push the unresolved symlink target parts onto the stack. + rest.extend(target.split(sep)[::-1]) - return path, True + return path supports_unicode_filenames = (sys.platform == 'darwin') diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index cbb7c4c52d9697..807f985f7f4df7 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -456,6 +456,15 @@ def test_realpath_relative(self): finally: os_helper.unlink(ABSTFN) + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash + def test_realpath_missing_pardir(self): + try: + os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN) + self.assertEqual(realpath("nonexistent/../" + os_helper.TESTFN), ABSTFN + "1") + finally: + os_helper.unlink(os_helper.TESTFN) + @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash def test_realpath_symlink_loops(self): diff --git a/Misc/NEWS.d/next/Library/2024-02-01-08-09-20.gh-issue-114847.-JrWrR.rst b/Misc/NEWS.d/next/Library/2024-02-01-08-09-20.gh-issue-114847.-JrWrR.rst new file mode 100644 index 00000000000000..bf011fed3efdbc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-01-08-09-20.gh-issue-114847.-JrWrR.rst @@ -0,0 +1 @@ +Speed up :func:`os.path.realpath` on non-Windows platforms.