Skip to content

Commit

Permalink
GH-101362: Optimise PurePath(PurePath(...)) (GH-101667)
Browse files Browse the repository at this point in the history
The previous `_parse_args()` method pulled the `_parts` out of any supplied `PurePath` objects; these were subsequently joined in `_from_parts()` using `os.path.join()`. This is actually a slower form of joining than calling `fspath()` on the path object, because it doesn't take advantage of the fact that the contents of `_parts` is normalized!

This reduces the time taken to run `PurePath("foo", "bar")` by ~20%, and the time taken to run `PurePath(p, "cheese")`, where `p = PurePath("/foo", "bar", "baz")`, by ~40%.

Automerge-Triggered-By: GH:AlexWaygood
  • Loading branch information
barneygale committed Mar 5, 2023
1 parent 3e60e02 commit 6716254
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 27 deletions.
5 changes: 3 additions & 2 deletions Doc/library/pathlib.rst
Expand Up @@ -105,8 +105,9 @@ we also call *flavours*:
PurePosixPath('setup.py')

Each element of *pathsegments* can be either a string representing a
path segment, an object implementing the :class:`os.PathLike` interface
which returns a string, or another path object::
path segment, or an object implementing the :class:`os.PathLike` interface
where the :meth:`~os.PathLike.__fspath__` method returns a string,
such as another path object::

>>> PurePath('foo', 'some/path', 'bar')
PurePosixPath('foo/some/path/bar')
Expand Down
36 changes: 11 additions & 25 deletions Lib/pathlib.py
Expand Up @@ -281,6 +281,14 @@ def _parse_parts(cls, parts):
path = cls._flavour.join(*parts)
sep = cls._flavour.sep
altsep = cls._flavour.altsep
if isinstance(path, str):
# Force-cast str subclasses to str (issue #21127)
path = str(path)
else:
raise TypeError(
"argument should be a str or an os.PathLike "
"object where __fspath__ returns a str, "
f"not {type(path).__name__!r}")
if altsep:
path = path.replace(altsep, sep)
drv, root, rel = cls._flavour.splitroot(path)
Expand All @@ -291,32 +299,10 @@ def _parse_parts(cls, parts):
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
return drv, root, parsed

@classmethod
def _parse_args(cls, args):
# This is useful when you don't want to create an instance, just
# canonicalize some constructor arguments.
parts = []
for a in args:
if isinstance(a, PurePath):
parts += a._parts
else:
a = os.fspath(a)
if isinstance(a, str):
# Force-cast str subclasses to str (issue #21127)
parts.append(str(a))
else:
raise TypeError(
"argument should be a str object or an os.PathLike "
"object returning str, not %r"
% type(a))
return cls._parse_parts(parts)

@classmethod
def _from_parts(cls, args):
# We need to call _parse_args on the instance, so as to get the
# right flavour.
self = object.__new__(cls)
drv, root, parts = self._parse_args(args)
drv, root, parts = self._parse_parts(args)
self._drv = drv
self._root = root
self._parts = parts
Expand Down Expand Up @@ -575,7 +561,7 @@ def joinpath(self, *args):
anchored).
"""
drv1, root1, parts1 = self._drv, self._root, self._parts
drv2, root2, parts2 = self._parse_args(args)
drv2, root2, parts2 = self._parse_parts(args)
if root2:
if not drv2 and drv1:
return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
Expand Down Expand Up @@ -662,7 +648,7 @@ def match(self, path_pattern):
return True

# Can't subclass os.PathLike from PurePath and keep the constructor
# optimizations in PurePath._parse_args().
# optimizations in PurePath.__slots__.
os.PathLike.register(PurePath)


Expand Down
27 changes: 27 additions & 0 deletions Lib/test/test_pathlib.py
Expand Up @@ -166,6 +166,33 @@ def test_constructor_common(self):
self.assertEqual(P(P('a'), P('b')), P('a/b'))
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))

def test_bytes(self):
P = self.cls
message = (r"argument should be a str or an os\.PathLike object "
r"where __fspath__ returns a str, not 'bytes'")
with self.assertRaisesRegex(TypeError, message):
P(b'a')
with self.assertRaises(TypeError):
P(b'a', 'b')
with self.assertRaises(TypeError):
P('a', b'b')
with self.assertRaises(TypeError):
P('a').joinpath(b'b')
with self.assertRaises(TypeError):
P('a') / b'b'
with self.assertRaises(TypeError):
b'a' / P('b')
with self.assertRaises(TypeError):
P('a').match(b'b')
with self.assertRaises(TypeError):
P('a').relative_to(b'b')
with self.assertRaises(TypeError):
P('a').with_name(b'b')
with self.assertRaises(TypeError):
P('a').with_stem(b'b')
with self.assertRaises(TypeError):
P('a').with_suffix(b'b')

def _check_str_subclass(self, *args):
# Issue #21127: it should be possible to construct a PurePath object
# from a str subclass instance, and it then gets converted to
Expand Down
@@ -0,0 +1,4 @@
Speed up :class:`pathlib.PurePath` construction by handling arguments more
uniformly. When a :class:`pathlib.Path` argument is supplied,
we use its string representation rather than joining its parts
with :func:`os.path.join`.

0 comments on commit 6716254

Please sign in to comment.