Skip to content
32 changes: 24 additions & 8 deletions Doc/library/urllib.request.rst
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,34 @@ The :mod:`urllib.request` module defines the following functions:
attribute to modify its position in the handlers list.


.. function:: pathname2url(path)
.. function:: pathname2url(path, include_scheme=False)

Convert the pathname *path* from the local syntax for a path to the form used in
the path component of a URL. This does not produce a complete URL. The return
value will already be quoted using the :func:`~urllib.parse.quote` function.
Convert the local pathname *path* to a percent-encoded URL. If
*include_scheme* is false (the default), the URL is returned without a
``file:`` scheme prefix; set this argument to true to generate a complete
URL.

.. versionchanged:: 3.14
The *include_scheme* argument was added.

.. function:: url2pathname(path)
.. versionchanged:: 3.14
Generates :rfc:`8089`-compliant file URLs for absolute paths. URLs for
UNC paths on Windows systems begin with two slashes (previously four.)
URLs for absolute paths on non-Windows systems begin with three slashes
(previously one.)


.. function:: url2pathname(url)

Convert the percent-encoded *url* to a local pathname.

.. versionchanged:: 3.14
Supports :rfc:`8089`-compliant file URLs. Raises
:exc:`~urllib.error.URLError` if a scheme other than ``file:`` is used.
If the URL uses a non-local authority, then on Windows a UNC path is
returned, and on other platforms a :exc:`~urllib.error.URLError`
exception is raised.

Convert the path component *path* from a percent-encoded URL to the local syntax for a
path. This does not accept a complete URL. This function uses
:func:`~urllib.parse.unquote` to decode *path*.

.. function:: getproxies()

Expand Down
22 changes: 22 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,28 @@ unittest
(Contributed by Jacob Walls in :gh:`80958`.)


urllib.request
--------------

* Improve support for ``file:`` URIs in :mod:`urllib.request`:

* :func:`~urllib.request.pathname2url` accepts a *include_scheme*
argument, which defaults to false. When set to true, a complete URL
with a ``file:`` prefix is returned.
* :func:`~urllib.request.url2pathname` discards a ``file:`` prefix if given.
* On Windows, :func:`~urllib.request.pathname2url` generates URIs that
begin with two slashes (rather than four) when given a UNC path.
* On non-Windows platforms, :func:`~urllib.request.pathname2url` generates
URIs that begin with three slashes (rather than one) when given an
absolute path. :func:`~urllib.request.url2pathname` performs the opposite
transformation, so ``file:///etc/hosts`` becomes ``/etc/hosts``.
* On non-Windows platforms, :func:`~urllib.request.url2pathname` raises
:exc:`urllib.error.URLError` if the URI includes a non-local authority,
like ``file://other-machine/etc/hosts``.

(Contributed by Barney Gale in :gh:`125866`.)


.. Add improved modules above alphabetically, not here at the end.

Optimizations
Expand Down
4 changes: 2 additions & 2 deletions Lib/nturl2path.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Convert a NT pathname to a file URL and vice versa.

This module only exists to provide OS-specific code
This module previously provided OS-specific code
for urllib.requests, thus do not use directly.
"""
# Testing is done through test_urllib.
# Testing is done through test_nturl2path.

def url2pathname(url):
"""OS-specific conversion from a relative URL of the 'file' scheme
Expand Down
111 changes: 111 additions & 0 deletions Lib/test/test_nturl2path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import nturl2path
import unittest
import urllib.parse


class nturl2path_Tests(unittest.TestCase):
"""Test pathname2url() and url2pathname()"""

def test_basic(self):
# Make sure simple tests pass
expected_path = "parts\\of\\a\\path"
expected_url = "parts/of/a/path"
result = nturl2path.pathname2url(expected_path)
self.assertEqual(expected_url, result,
"pathname2url() failed; %s != %s" %
(result, expected_url))
result = nturl2path.url2pathname(expected_url)
self.assertEqual(expected_path, result,
"url2pathame() failed; %s != %s" %
(result, expected_path))

def test_quoting(self):
# Test automatic quoting and unquoting works for pathnam2url() and
# url2pathname() respectively
given = "needs\\quot=ing\\here"
expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
result = nturl2path.pathname2url(given)
self.assertEqual(expect, result,
"pathname2url() failed; %s != %s" %
(expect, result))
expect = given
result = nturl2path.url2pathname(result)
self.assertEqual(expect, result,
"url2pathname() failed; %s != %s" %
(expect, result))
given = "make sure\\using_quote"
expect = "%s/using_quote" % urllib.parse.quote("make sure")
result = nturl2path.pathname2url(given)
self.assertEqual(expect, result,
"pathname2url() failed; %s != %s" %
(expect, result))
given = "make+sure/using_unquote"
expect = "make+sure\\using_unquote"
result = nturl2path.url2pathname(given)
self.assertEqual(expect, result,
"url2pathname() failed; %s != %s" %
(expect, result))

def test_pathname2url(self):
# Test special prefixes are correctly handled in pathname2url()
fn = nturl2path.pathname2url
self.assertEqual(fn('\\\\?\\C:\\dir'), '///C:/dir')
self.assertEqual(fn('\\\\?\\unc\\server\\share\\dir'), '/server/share/dir')
self.assertEqual(fn("C:"), '///C:')
self.assertEqual(fn("C:\\"), '///C:')
self.assertEqual(fn('C:\\a\\b.c'), '///C:/a/b.c')
self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c')
self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9')
self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo")
# Long drive letter
self.assertRaises(IOError, fn, "XX:\\")
# No drive letter
self.assertEqual(fn("\\folder\\test\\"), '/folder/test/')
self.assertEqual(fn("\\\\folder\\test\\"), '////folder/test/')
self.assertEqual(fn("\\\\\\folder\\test\\"), '/////folder/test/')
self.assertEqual(fn('\\\\some\\share\\'), '////some/share/')
self.assertEqual(fn('\\\\some\\share\\a\\b.c'), '////some/share/a/b.c')
self.assertEqual(fn('\\\\some\\share\\a\\b%#c\xe9'), '////some/share/a/b%25%23c%C3%A9')
# Round-tripping
urls = ['///C:',
'/////folder/test/',
'///C:/foo/bar/spam.foo']
for url in urls:
self.assertEqual(fn(nturl2path.url2pathname(url)), url)

def test_url2pathname(self):
fn = nturl2path.url2pathname
self.assertEqual(fn('/C:/'), 'C:\\')
self.assertEqual(fn("///C|"), 'C:')
self.assertEqual(fn("///C:"), 'C:')
self.assertEqual(fn('///C:/'), 'C:\\')
self.assertEqual(fn('/C|//'), 'C:\\')
self.assertEqual(fn('///C|/path'), 'C:\\path')
# No DOS drive
self.assertEqual(fn("///C/test/"), '\\\\\\C\\test\\')
self.assertEqual(fn("////C/test/"), '\\\\C\\test\\')
# DOS drive paths
self.assertEqual(fn('C:/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo')
# Non-ASCII drive letter
self.assertRaises(IOError, fn, "///\u00e8|/")
# UNC paths
self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file')
self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file')
self.assertEqual(fn('/////server/path/to/file'), '\\\\\\server\\path\\to\\file')
# Localhost paths
self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file')
# Round-tripping
paths = ['C:',
r'\\\C\test\\',
r'C:\foo\bar\spam.foo']
for path in paths:
self.assertEqual(fn(nturl2path.pathname2url(path)), path)


if __name__ == '__main__':
unittest.main()
51 changes: 28 additions & 23 deletions Lib/test/test_urllib.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ def constructLocalFileUrl(self, filePath):
filePath.encode("utf-8")
except UnicodeEncodeError:
raise unittest.SkipTest("filePath is not encodable to utf8")
return "file://%s" % urllib.request.pathname2url(filePath)
return urllib.request.pathname2url(filePath, include_scheme=True)

def createNewTempFile(self, data=b""):
"""Creates a new temporary file containing the specified data,
Expand Down Expand Up @@ -1526,15 +1526,17 @@ def test_pathname2url_win(self):
self.assertEqual(fn('\\\\?\\C:\\dir'), '///C:/dir')
self.assertEqual(fn('\\\\?\\unc\\server\\share\\dir'), '//server/share/dir')
self.assertEqual(fn("C:"), '///C:')
self.assertEqual(fn("C:\\"), '///C:')
# Path root is meaningful and should be preserved.
self.assertEqual(fn("C:\\"), '///C:/')
self.assertEqual(fn('C:\\a\\b.c'), '///C:/a/b.c')
self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c')
self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9')
self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo")
# Long drive letter
self.assertRaises(IOError, fn, "XX:\\")
# No drive letter
self.assertEqual(fn("\\folder\\test\\"), '/folder/test/')
# Long drive letter: treat as relative path, like ntpath.isabs()/splitroot()
self.assertEqual(fn("XX:\\"), "XX%3A/")
# No drive letter: use empty authority
self.assertEqual(fn("\\folder\\test\\"), '///folder/test/')
# UNC paths: UNC server becomes URL authority
self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/')
self.assertEqual(fn("\\\\\\folder\\test\\"), '///folder/test/')
self.assertEqual(fn('\\\\some\\share\\'), '//some/share/')
Expand All @@ -1551,9 +1553,10 @@ def test_pathname2url_win(self):
'test specific to POSIX pathnames')
def test_pathname2url_posix(self):
fn = urllib.request.pathname2url
self.assertEqual(fn('/'), '/')
self.assertEqual(fn('/a/b.c'), '/a/b.c')
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
# Absolute paths: use zero-length authority.
self.assertEqual(fn('/'), '///')
self.assertEqual(fn('/a/b.c'), '///a/b.c')
self.assertEqual(fn('/a/b%#c'), '///a/b%25%23c')

@unittest.skipUnless(sys.platform == 'win32',
'test specific to Windows pathnames.')
Expand All @@ -1563,29 +1566,29 @@ def test_url2pathname_win(self):
self.assertEqual(fn("///C|"), 'C:')
self.assertEqual(fn("///C:"), 'C:')
self.assertEqual(fn('///C:/'), 'C:\\')
self.assertEqual(fn('/C|//'), 'C:\\')
self.assertEqual(fn('/C|//'), 'C:\\\\')
self.assertEqual(fn('///C|/path'), 'C:\\path')
# No DOS drive
self.assertEqual(fn("///C/test/"), '\\\\\\C\\test\\')
self.assertEqual(fn("///C/test/"), '\\C\\test\\')
self.assertEqual(fn("////C/test/"), '\\\\C\\test\\')
# DOS drive paths
self.assertEqual(fn('C:/path/to/file'), 'C:\\path\\to\\file')
# DOS drive paths: see RFC 8089 (D.2.)
self.assertEqual(fn('file:C:/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo')
# Non-ASCII drive letter
self.assertRaises(IOError, fn, "///\u00e8|/")
# UNC paths
# Non-ASCII drive letter: treat as real DOS drive, like ntpath.isabs()/splitroot()
self.assertEqual(fn("///\u00e8|/"), "\u00e8:\\")
# UNC paths: see RFC 8089 (E.3.)
self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file')
self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file')
self.assertEqual(fn('/////server/path/to/file'), '\\\\\\server\\path\\to\\file')
# Localhost paths
self.assertEqual(fn('/////server/path/to/file'), '\\\\server\\path\\to\\file')
# Localhost paths: see RFC 8989 (2.)
self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file')
# Round-tripping
paths = ['C:',
r'\\\C\test\\',
r'\C\test\\',
r'C:\foo\bar\spam.foo']
for path in paths:
self.assertEqual(fn(urllib.request.pathname2url(path)), path)
Expand All @@ -1595,10 +1598,12 @@ def test_url2pathname_win(self):
def test_url2pathname_posix(self):
fn = urllib.request.url2pathname
self.assertEqual(fn('/foo/bar'), '/foo/bar')
self.assertEqual(fn('//foo/bar'), '//foo/bar')
self.assertEqual(fn('///foo/bar'), '///foo/bar')
self.assertEqual(fn('////foo/bar'), '////foo/bar')
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
# URI from a machine called 'foo': should raise URLError
self.assertRaises(urllib.error.URLError, fn, '//foo/bar')
# URI with empty or local authority: discard authority section
self.assertEqual(fn('///foo/bar'), '/foo/bar')
self.assertEqual(fn('////foo/bar'), '//foo/bar')
self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar')

class Utility_Tests(unittest.TestCase):
"""Testcase to test the various utility functions in the urllib."""
Expand Down
19 changes: 8 additions & 11 deletions Lib/test/test_urllib2.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,6 @@ def test___all__(self):
context = {}
exec('from urllib.%s import *' % module, context)
del context['__builtins__']
if module == 'request' and os.name == 'nt':
u, p = context.pop('url2pathname'), context.pop('pathname2url')
self.assertEqual(u.__module__, 'nturl2path')
self.assertEqual(p.__module__, 'nturl2path')
for k, v in context.items():
self.assertEqual(v.__module__, 'urllib.%s' % module,
"%r is exposed in 'urllib.%s' but defined in %r" %
Expand Down Expand Up @@ -827,14 +823,15 @@ def test_file(self):
urls = [
"file://localhost%s" % urlpath,
"file://%s" % urlpath,
"file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
]
try:
localaddr = socket.gethostbyname(socket.gethostname())
except socket.gaierror:
localaddr = ''
if localaddr:
urls.append("file://%s%s" % (localaddr, urlpath))
if os.name != 'nt':
urls.append("file://%s%s" % (socket.gethostbyname('localhost'), urlpath))
try:
localaddr = socket.gethostbyname(socket.gethostname())
except socket.gaierror:
localaddr = ''
if localaddr:
urls.append("file://%s%s" % (localaddr, urlpath))

for url in urls:
f = open(TESTFN, "wb")
Expand Down
Loading
Loading