Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-107465: Add pathlib.Path.from_uri() classmethod. #107640

Merged
merged 13 commits into from
Oct 1, 2023
43 changes: 43 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,49 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5


.. classmethod:: Path.from_uri(uri)

Return a new path object from parsing a 'file' URI conforming to
:rfc:`8089`. For example::

>>> p = Path.from_uri('file:///etc/hosts')
PosixPath('/etc/hosts')

On Windows, DOS device and UNC paths may be parsed from URIs::

>>> p = Path.from_uri('file:///c:/windows')
WindowsPath('c:/windows')
>>> p = Path.from_uri('file://server/share')
WindowsPath('//server/share')

Several variant forms are supported::

>>> p = Path.from_uri('file:////server/share')
WindowsPath('//server/share')
>>> p = Path.from_uri('file://///server/share')
WindowsPath('//server/share')
>>> p = Path.from_uri('file:c:/windows')
WindowsPath('c:/windows')
>>> p = Path.from_uri('file:/c|/windows')
WindowsPath('c:/windows')
barneygale marked this conversation as resolved.
Show resolved Hide resolved

URIs with no slash after the scheme (and no drive letter) are parsed as
relative paths::

>>> p = Path.from_uri('file:foo/bar')
WindowsPath('foo/bar')
barneygale marked this conversation as resolved.
Show resolved Hide resolved

Users may wish to test the result with :meth:`~PurePath.is_absolute` and
reject relative paths, as these are not portable across processes with
different working directories.
barneygale marked this conversation as resolved.
Show resolved Hide resolved

:func:`os.fsdecode` is used to decode percent-escaped byte sequences, and
so file URIs are not portable across machines with different
:ref:`filesystem encodings <filesystem-encoding>`.

.. versionadded:: 3.13


.. method:: Path.stat(*, follow_symlinks=True)

Return a :class:`os.stat_result` object containing information about this path, like :func:`os.stat`.
Expand Down
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ pathlib
:exc:`NotImplementedError` when a path operation isn't supported.
(Contributed by Barney Gale in :gh:`89812`.)

* Add :meth:`pathlib.Path.from_uri` classmethod.
barneygale marked this conversation as resolved.
Show resolved Hide resolved
(Contributed by Barney Gale in :gh:`107465`.)

* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
(Contributed by Barney Gale in :gh:`73435`.)

Expand Down
23 changes: 21 additions & 2 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from _collections_abc import Sequence
from errno import ENOENT, ENOTDIR, EBADF, ELOOP
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
from urllib.parse import quote_from_bytes as urlquote_from_bytes


__all__ = [
Expand Down Expand Up @@ -433,7 +432,8 @@ def as_uri(self):
# It's a posix path => 'file:///etc/hosts'
prefix = 'file://'
path = str(self)
return prefix + urlquote_from_bytes(os.fsencode(path))
from urllib.parse import quote_from_bytes
return prefix + quote_from_bytes(os.fsencode(path))

@property
def _str_normcase(self):
Expand Down Expand Up @@ -1184,6 +1184,25 @@ def __new__(cls, *args, **kwargs):
cls = WindowsPath if os.name == 'nt' else PosixPath
return object.__new__(cls)

@classmethod
def from_uri(cls, uri):
"""Return a new path from the given 'file' URI."""
uri = uri.removeprefix('file:')
barneygale marked this conversation as resolved.
Show resolved Hide resolved
if uri[:3] == '///':
# Remove empty authority
uri = uri[2:]
elif uri[:12] == '//localhost/':
# Remove 'localhost' authority
uri = uri[11:]
if uri[:1] == '/' and (uri[2:3] in ':|' or uri[1:3] == '//'):
# Remove slash before DOS device/UNC path
uri = uri[1:]
if uri[1:2] == '|':
# Replace bar with colon in DOS drive
uri = uri[:1] + ':' + uri[2:]
from urllib.parse import unquote_to_bytes
return cls(os.fsdecode(unquote_to_bytes(uri)))

@classmethod
def cwd(cls):
"""Return a new path pointing to the current working directory."""
Expand Down
36 changes: 35 additions & 1 deletion Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import tempfile
import unittest
from unittest import mock
from urllib.request import pathname2url

from test.support import import_helper
from test.support import set_recursion_limit
Expand Down Expand Up @@ -2938,6 +2939,21 @@ def test_passing_kwargs_deprecated(self):
with self.assertWarns(DeprecationWarning):
self.cls(foo="bar")

def test_from_uri_common(self):
P = self.cls
self.assertEqual(P.from_uri('file:foo/bar'), P('foo/bar'))
barneygale marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar'))
self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar'))
self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar'))
self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar'))
self.assertEqual(P.from_uri('file://localhost/foo/bar'), P('/foo/bar'))

def test_from_uri_pathname2url_common(self):
P = self.cls
self.assertEqual(P.from_uri(pathname2url('foo/bar')), P('foo/bar'))
self.assertEqual(P.from_uri(pathname2url('/foo/bar')), P('/foo/bar'))
self.assertEqual(P.from_uri(pathname2url('//foo/bar')), P('//foo/bar'))


class WalkTests(unittest.TestCase):

Expand Down Expand Up @@ -3466,7 +3482,25 @@ def check():
env['HOME'] = 'C:\\Users\\eve'
check()


def test_from_uri(self):
P = self.cls
# DOS drive paths
self.assertEqual(P.from_uri('file:c:/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file:c|/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file:/c|/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file:///c|/path/to/file'), P('c:/path/to/file'))
# UNC paths
self.assertEqual(P.from_uri('file://server/path/to/file'), P('//server/path/to/file'))
self.assertEqual(P.from_uri('file:////server/path/to/file'), P('//server/path/to/file'))
self.assertEqual(P.from_uri('file://///server/path/to/file'), P('//server/path/to/file'))
# Localhost paths
self.assertEqual(P.from_uri('file://localhost/c:/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file://localhost/c|/path/to/file'), P('c:/path/to/file'))

def test_from_uri_pathname2url(self):
P = self.cls
self.assertEqual(P.from_uri(pathname2url(r'c:\path\to\file')), P('c:/path/to/file'))
self.assertEqual(P.from_uri(pathname2url(r'\\server\path\to\file')), P('//server/path/to/file'))

class PathSubclassTest(PathTest):
class cls(pathlib.Path):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add :meth:`pathlib.Path.from_uri` classmethod.