Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-114847: Speed up posixpath.realpath() #114848

Merged
merged 23 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 60 additions & 32 deletions Lib/posixpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,49 +412,55 @@ def realpath(filename, *, strict=False):
"""Return the canonical path of the specified filename, eliminating any
symbolic links encountered in the path."""
filename = os.fspath(filename)
path, ok = _joinrealpath(filename[:0], filename, strict, {})
return abspath(path)

# Join two paths, normalizing and eliminating any symbolic links
# encountered in the second path.
def _joinrealpath(path, rest, strict, seen):
if isinstance(path, bytes):
if isinstance(filename, bytes):
sep = b'/'
curdir = b'.'
pardir = b'..'
getcwd = os.getcwdb
else:
sep = '/'
curdir = '.'
pardir = '..'
getcwd = os.getcwd

# The stack of unresolved path parts. When popped, a special value of None
# indicates that a symlink target has been resolved, and that the original
# symlink path can be retrieved by popping again. The [::-1] slice is a
# very fast way of spelling list(reversed(...)).
rest = filename.split(sep)[::-1]
barneygale marked this conversation as resolved.
Show resolved Hide resolved
barneygale marked this conversation as resolved.
Show resolved Hide resolved

if isabs(rest):
rest = rest[1:]
path = sep
# The resolved path, which is absolute throughout this function.
# Note: getcwd() returns a normalized and symlink-free path.
barneygale marked this conversation as resolved.
Show resolved Hide resolved
path = sep if filename.startswith(sep) else getcwd()

# Mapping from symlink paths to *fully resolved* symlink targets. If a
# symlink is encountered but not yet resolved, the value is None. This is
# used both to detect symlink loops and to speed up repeated traversals of
# the same links.
seen = {}

# Whether we're calling lstat() and readlink() to resolve symlinks. If we
# encounter an OSError in non-strict mode, this is switched off.
barneygale marked this conversation as resolved.
Show resolved Hide resolved
querying = True

while rest:
name, _, rest = rest.partition(sep)
name = rest.pop()
if name is None:
# resolved symlink target
seen[rest.pop()] = path
continue
if not name or name == curdir:
# current dir
continue
if name == pardir:
# parent dir
if path:
path, name = split(path)
if name == pardir:
path = join(path, pardir, pardir)
else:
path = pardir
path = path[:path.rfind(sep)] or sep
barneygale marked this conversation as resolved.
Show resolved Hide resolved
continue
newpath = join(path, name)
try:
st = os.lstat(newpath)
except OSError:
if strict:
raise
is_link = False
if path == sep:
newpath = path + name
else:
is_link = stat.S_ISLNK(st.st_mode)
if not is_link:
newpath = path + sep + name
if not querying:
path = newpath
continue
# Resolve the symbolic link
Expand All @@ -470,14 +476,36 @@ def _joinrealpath(path, rest, strict, seen):
os.stat(newpath)
else:
# Return already resolved part + rest of the path unchanged.
return join(newpath, rest), False
path = newpath
querying = False
continue
try:
st = os.lstat(newpath)
if not stat.S_ISLNK(st.st_mode):
path = newpath
continue
target = os.readlink(newpath)
except OSError:
if strict:
raise
else:
# Return already resolved part + rest of the path unchanged.
path = newpath
querying = False
continue
if target.startswith(sep):
# Symlink target is absolute; reset resolved path.
path = sep
seen[newpath] = None # not resolved symlink
path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
if not ok:
return join(path, rest), False
seen[newpath] = path # resolved symlink
# Push the symlink path onto the stack, and signal its specialness by
# also pushing None. When these entries are popped, we'll record the
# fully-resolved symlink target in the 'seen' mapping.
rest.append(newpath)
rest.append(None)
# Push the unresolved symlink target parts onto the stack.
rest.extend(target.split(sep)[::-1])

return path, True
return path


supports_unicode_filenames = (sys.platform == 'darwin')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Speed up :func:`os.path.realpath` on non-Windows platforms.
Loading