Skip to content

Commit

Permalink
GH-76846, GH-85281: Call __new__() and __init__() on pathlib subc…
Browse files Browse the repository at this point in the history
…lasses (GH-102789)

Fix an issue where `__new__()` and `__init__()` were not called on subclasses of `pathlib.PurePath` and `Path` in some circumstances.

Paths are now normalized on-demand. This speeds up path construction, `p.joinpath(q)`, and `p / q`.

Co-authored-by: Steve Dower <steve.dower@microsoft.com>
  • Loading branch information
barneygale and zooba committed Apr 3, 2023
1 parent 2a72125 commit 11c3020
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 68 deletions.
145 changes: 78 additions & 67 deletions Lib/pathlib.py
Expand Up @@ -16,7 +16,6 @@
import warnings
from _collections_abc import Sequence
from errno import ENOENT, ENOTDIR, EBADF, ELOOP
from operator import attrgetter
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
from urllib.parse import quote_from_bytes as urlquote_from_bytes

Expand Down Expand Up @@ -216,8 +215,8 @@ class _PathParents(Sequence):
def __init__(self, path):
# We don't store the instance to avoid reference cycles
self._pathcls = type(path)
self._drv = path._drv
self._root = path._root
self._drv = path.drive
self._root = path.root
self._parts = path._parts

def __len__(self):
Expand Down Expand Up @@ -251,36 +250,33 @@ class PurePath(object):
directly, regardless of your system.
"""
__slots__ = (
'_drv', '_root', '_parts',
'_raw_path', '_drv', '_root', '_parts_cached',
'_str', '_hash', '_parts_tuple', '_parts_normcase_cached',
)
_flavour = os.path

def __new__(cls, *args):
def __new__(cls, *args, **kwargs):
"""Construct a PurePath from one or several strings and or existing
PurePath objects. The strings and path objects are combined so as
to yield a canonicalized path, which is incorporated into the
new PurePath object.
"""
if cls is PurePath:
cls = PureWindowsPath if os.name == 'nt' else PurePosixPath
return cls._from_parts(args)
return object.__new__(cls)

def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
return (self.__class__, tuple(self._parts))
return (self.__class__, self.parts)

@classmethod
def _parse_parts(cls, parts):
if not parts:
return '', '', []
elif len(parts) == 1:
path = os.fspath(parts[0])
def __init__(self, *args):
if not args:
path = ''
elif len(args) == 1:
path = os.fspath(args[0])
else:
path = cls._flavour.join(*parts)
sep = cls._flavour.sep
altsep = cls._flavour.altsep
path = self._flavour.join(*args)
if isinstance(path, str):
# Force-cast str subclasses to str (issue #21127)
path = str(path)
Expand All @@ -289,6 +285,14 @@ def _parse_parts(cls, parts):
"argument should be a str or an os.PathLike "
"object where __fspath__ returns a str, "
f"not {type(path).__name__!r}")
self._raw_path = path

@classmethod
def _parse_path(cls, path):
if not path:
return '', '', []
sep = cls._flavour.sep
altsep = cls._flavour.altsep
if altsep:
path = path.replace(altsep, sep)
drv, root, rel = cls._flavour.splitroot(path)
Expand All @@ -299,21 +303,20 @@ def _parse_parts(cls, parts):
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
return drv, root, parsed

@classmethod
def _from_parts(cls, args):
self = object.__new__(cls)
drv, root, parts = self._parse_parts(args)
def _load_parts(self):
drv, root, parts = self._parse_path(self._raw_path)
self._drv = drv
self._root = root
self._parts = parts
return self
self._parts_cached = parts

@classmethod
def _from_parsed_parts(cls, drv, root, parts):
self = object.__new__(cls)
path = cls._format_parsed_parts(drv, root, parts)
self = cls(path)
self._str = path or '.'
self._drv = drv
self._root = root
self._parts = parts
self._parts_cached = parts
return self

@classmethod
Expand All @@ -330,7 +333,7 @@ def __str__(self):
try:
return self._str
except AttributeError:
self._str = self._format_parsed_parts(self._drv, self._root,
self._str = self._format_parsed_parts(self.drive, self.root,
self._parts) or '.'
return self._str

Expand All @@ -356,7 +359,7 @@ def as_uri(self):
if not self.is_absolute():
raise ValueError("relative path can't be expressed as a file URI")

drive = self._drv
drive = self.drive
if len(drive) == 2 and drive[1] == ':':
# It's a path on a local drive => 'file:///c:/a/b'
prefix = 'file:///' + drive
Expand Down Expand Up @@ -412,23 +415,43 @@ def __ge__(self, other):
return NotImplemented
return self._parts_normcase >= other._parts_normcase

drive = property(attrgetter('_drv'),
doc="""The drive prefix (letter or UNC path), if any.""")
@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
try:
return self._drv
except AttributeError:
self._load_parts()
return self._drv

@property
def root(self):
"""The root of the path, if any."""
try:
return self._root
except AttributeError:
self._load_parts()
return self._root

root = property(attrgetter('_root'),
doc="""The root of the path, if any.""")
@property
def _parts(self):
try:
return self._parts_cached
except AttributeError:
self._load_parts()
return self._parts_cached

@property
def anchor(self):
"""The concatenation of the drive and root, or ''."""
anchor = self._drv + self._root
anchor = self.drive + self.root
return anchor

@property
def name(self):
"""The final path component, if any."""
parts = self._parts
if len(parts) == (1 if (self._drv or self._root) else 0):
if len(parts) == (1 if (self.drive or self.root) else 0):
return ''
return parts[-1]

Expand Down Expand Up @@ -477,7 +500,7 @@ def with_name(self, name):
drv, root, tail = f.splitroot(name)
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
raise ValueError("Invalid name %r" % (name))
return self._from_parsed_parts(self._drv, self._root,
return self._from_parsed_parts(self.drive, self.root,
self._parts[:-1] + [name])

def with_stem(self, stem):
Expand All @@ -502,7 +525,7 @@ def with_suffix(self, suffix):
name = name + suffix
else:
name = name[:-len(old_suffix)] + suffix
return self._from_parsed_parts(self._drv, self._root,
return self._from_parsed_parts(self.drive, self.root,
self._parts[:-1] + [name])

def relative_to(self, other, /, *_deprecated, walk_up=False):
Expand Down Expand Up @@ -561,22 +584,7 @@ def joinpath(self, *args):
paths) or a totally different path (if one of the arguments is
anchored).
"""
drv1, root1, parts1 = self._drv, self._root, self._parts
drv2, root2, parts2 = self._parse_parts(args)
if root2:
if not drv2 and drv1:
return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
else:
return self._from_parsed_parts(drv2, root2, parts2)
elif drv2:
if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1):
# Same drive => second path is relative to the first.
return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:])
else:
return self._from_parsed_parts(drv2, root2, parts2)
else:
# Second path is non-anchored (common case).
return self._from_parsed_parts(drv1, root1, parts1 + parts2)
return self.__class__(self._raw_path, *args)

def __truediv__(self, key):
try:
Expand All @@ -586,15 +594,15 @@ def __truediv__(self, key):

def __rtruediv__(self, key):
try:
return self._from_parts([key] + self._parts)
return type(self)(key, self._raw_path)
except TypeError:
return NotImplemented

@property
def parent(self):
"""The logical parent of the path."""
drv = self._drv
root = self._root
drv = self.drive
root = self.root
parts = self._parts
if len(parts) == 1 and (drv or root):
return self
Expand All @@ -610,7 +618,7 @@ def is_absolute(self):
a drive)."""
# ntpath.isabs() is defective - see GH-44626 .
if self._flavour is ntpath:
return bool(self._drv and self._root)
return bool(self.drive and self.root)
return self._flavour.isabs(self)

def is_reserved(self):
Expand All @@ -634,7 +642,7 @@ def match(self, path_pattern):
Return True if this path matches the given pattern.
"""
path_pattern = self._flavour.normcase(path_pattern)
drv, root, pat_parts = self._parse_parts((path_pattern,))
drv, root, pat_parts = self._parse_path(path_pattern)
if not pat_parts:
raise ValueError("empty pattern")
parts = self._parts_normcase
Expand Down Expand Up @@ -687,20 +695,23 @@ class Path(PurePath):
"""
__slots__ = ()

def __new__(cls, *args, **kwargs):
def __init__(self, *args, **kwargs):
if kwargs:
msg = ("support for supplying keyword arguments to pathlib.PurePath "
"is deprecated and scheduled for removal in Python {remove}")
warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14))
super().__init__(*args)

def __new__(cls, *args, **kwargs):
if cls is Path:
cls = WindowsPath if os.name == 'nt' else PosixPath
return cls._from_parts(args)
return object.__new__(cls)

def _make_child_relpath(self, part):
# This is an optimization used for dir walking. `part` must be
# a single part relative to this path.
parts = self._parts + [part]
return self._from_parsed_parts(self._drv, self._root, parts)
return self._from_parsed_parts(self.drive, self.root, parts)

def __enter__(self):
# In previous versions of pathlib, __exit__() marked this path as
Expand Down Expand Up @@ -770,7 +781,7 @@ def glob(self, pattern):
sys.audit("pathlib.Path.glob", self, pattern)
if not pattern:
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
drv, root, pattern_parts = self._parse_parts((pattern,))
drv, root, pattern_parts = self._parse_path(pattern)
if drv or root:
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
Expand All @@ -785,7 +796,7 @@ def rglob(self, pattern):
this subtree.
"""
sys.audit("pathlib.Path.rglob", self, pattern)
drv, root, pattern_parts = self._parse_parts((pattern,))
drv, root, pattern_parts = self._parse_path(pattern)
if drv or root:
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
Expand All @@ -802,12 +813,12 @@ def absolute(self):
"""
if self.is_absolute():
return self
elif self._drv:
elif self.drive:
# There is a CWD on each drive-letter drive.
cwd = self._flavour.abspath(self._drv)
cwd = self._flavour.abspath(self.drive)
else:
cwd = os.getcwd()
return self._from_parts([cwd] + self._parts)
return type(self)(cwd, self._raw_path)

def resolve(self, strict=False):
"""
Expand All @@ -825,7 +836,7 @@ def check_eloop(e):
except OSError as e:
check_eloop(e)
raise
p = self._from_parts((s,))
p = type(self)(s)

# In non-strict mode, realpath() doesn't raise on symlink loops.
# Ensure we get an exception by calling stat()
Expand Down Expand Up @@ -915,7 +926,7 @@ def readlink(self):
"""
if not hasattr(os, "readlink"):
raise NotImplementedError("os.readlink() not available on this system")
return self._from_parts((os.readlink(self),))
return type(self)(os.readlink(self))

def touch(self, mode=0o666, exist_ok=True):
"""
Expand Down Expand Up @@ -1184,12 +1195,12 @@ def expanduser(self):
""" Return a new path with expanded ~ and ~user constructs
(as returned by os.path.expanduser)
"""
if (not (self._drv or self._root) and
if (not (self.drive or self.root) and
self._parts and self._parts[0][:1] == '~'):
homedir = self._flavour.expanduser(self._parts[0])
if homedir[:1] == "~":
raise RuntimeError("Could not determine home directory.")
drv, root, parts = self._parse_parts((homedir,))
drv, root, parts = self._parse_path(homedir)
return self._from_parsed_parts(drv, root, parts + self._parts[1:])

return self
Expand Down
27 changes: 26 additions & 1 deletion Lib/test/test_pathlib.py
Expand Up @@ -27,7 +27,9 @@
class _BaseFlavourTest(object):

def _check_parse_parts(self, arg, expected):
f = self.cls._parse_parts
def f(parts):
path = self.cls(*parts)._raw_path
return self.cls._parse_path(path)
sep = self.flavour.sep
altsep = self.flavour.altsep
actual = f([x.replace('/', sep) for x in arg])
Expand Down Expand Up @@ -136,6 +138,14 @@ def test_parse_parts(self):
# Tests for the pure classes.
#

class _BasePurePathSubclass(object):
init_called = False

def __init__(self, *args):
super().__init__(*args)
self.init_called = True


class _BasePurePathTest(object):

# Keys are canonical paths, values are list of tuples of arguments
Expand Down Expand Up @@ -221,6 +231,21 @@ def test_str_subclass_common(self):
self._check_str_subclass('a/b.txt')
self._check_str_subclass('/a/b.txt')

def test_init_called_common(self):
class P(_BasePurePathSubclass, self.cls):
pass
p = P('foo', 'bar')
self.assertTrue((p / 'foo').init_called)
self.assertTrue(('foo' / p).init_called)
self.assertTrue(p.joinpath('foo').init_called)
self.assertTrue(p.with_name('foo').init_called)
self.assertTrue(p.with_stem('foo').init_called)
self.assertTrue(p.with_suffix('.foo').init_called)
self.assertTrue(p.relative_to('foo').init_called)
self.assertTrue(p.parent.init_called)
for parent in p.parents:
self.assertTrue(parent.init_called)

def test_join_common(self):
P = self.cls
p = P('a/b')
Expand Down

0 comments on commit 11c3020

Please sign in to comment.