From e3a81cba0b0e1b169ba2fb43035190c3923eb108 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 31 Oct 2024 00:03:26 +0000 Subject: [PATCH 1/5] GH-126212: Fix removal of slashes in file URIs on Windows Adjust `urllib.request.pathname2url()` and `url2pathname()` so that they don't remove slashes from Windows DOS drive paths and URLs. There was no basis for this behaviour, and it conflicts with how UNC and POSIX paths are handled. --- Lib/nturl2path.py | 24 +++++-------------- Lib/test/test_urllib.py | 8 +++++-- ...-10-30-23-59-36.gh-issue-126212._9uYjT.rst | 3 +++ 3 files changed, 15 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-30-23-59-36.gh-issue-126212._9uYjT.rst diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 6453f202c26d14..a70bd30fbf390a 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -24,22 +24,14 @@ def url2pathname(url): # convert this to \\host\path\on\remote\host # (notice halving of slashes at the start of the path) url = url[2:] - components = url.split('/') - # make sure not to convert quoted slashes :-) - return urllib.parse.unquote('\\'.join(components)) + return urllib.parse.unquote(url.replace('/', '\\')) comp = url.split('|') if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: error = 'Bad URL: ' + url raise OSError(error) drive = comp[0][-1].upper() - components = comp[1].split('/') - path = drive + ':' - for comp in components: - if comp: - path = path + '\\' + urllib.parse.unquote(comp) - # Issue #11474 - handing url such as |c/| - if path.endswith(':') and url.endswith('/'): - path += '\\' + tail = comp[1].replace('/', '\\') + path = drive + ':' + urllib.parse.unquote(tail) return path def pathname2url(p): @@ -60,17 +52,13 @@ def pathname2url(p): raise OSError('Bad path: ' + p) if not ':' in p: # No drive specifier, just convert slashes and quote the name - components = p.split('\\') - return urllib.parse.quote('/'.join(components)) + return urllib.parse.quote(p.replace('\\', '/')) comp = p.split(':', maxsplit=2) if len(comp) != 2 or len(comp[0]) > 1: error = 'Bad path: ' + p raise OSError(error) drive = urllib.parse.quote(comp[0].upper()) - components = comp[1].split('\\') - path = '///' + drive + ':' - for comp in components: - if comp: - path = path + '/' + urllib.parse.quote(comp) + tail = comp[1].replace('\\', '/') + path = '///' + drive + ':' + urllib.parse.quote(tail) return path diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 3ee17f96b817e1..57227265fb6b25 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1526,8 +1526,10 @@ def test_pathname2url_win(self): self.assertEqual(fn('\\\\?\\C:\\dir'), '///C:/dir') self.assertEqual(fn('\\\\?\\unc\\server\\share\\dir'), '//server/share/dir') self.assertEqual(fn("C:"), '///C:') - self.assertEqual(fn("C:\\"), '///C:') + self.assertEqual(fn("C:\\"), '///C:/') self.assertEqual(fn('C:\\a\\b.c'), '///C:/a/b.c') + self.assertEqual(fn('C:\\a\\b.c\\'), '///C:/a/b.c/') + self.assertEqual(fn('C:\\a\\\\b.c'), '///C:/a//b.c') self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c') self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9') self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo") @@ -1563,13 +1565,15 @@ def test_url2pathname_win(self): self.assertEqual(fn("///C|"), 'C:') self.assertEqual(fn("///C:"), 'C:') self.assertEqual(fn('///C:/'), 'C:\\') - self.assertEqual(fn('/C|//'), 'C:\\') + self.assertEqual(fn('/C|//'), 'C:\\\\') self.assertEqual(fn('///C|/path'), 'C:\\path') # No DOS drive self.assertEqual(fn("///C/test/"), '\\\\\\C\\test\\') self.assertEqual(fn("////C/test/"), '\\\\C\\test\\') # DOS drive paths self.assertEqual(fn('C:/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn('C:/path/to/file/'), 'C:\\path\\to\\file\\') + self.assertEqual(fn('C:/path/to//file'), 'C:\\path\\to\\\\file') self.assertEqual(fn('C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file') diff --git a/Misc/NEWS.d/next/Library/2024-10-30-23-59-36.gh-issue-126212._9uYjT.rst b/Misc/NEWS.d/next/Library/2024-10-30-23-59-36.gh-issue-126212._9uYjT.rst new file mode 100644 index 00000000000000..047fe0f68048b5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-30-23-59-36.gh-issue-126212._9uYjT.rst @@ -0,0 +1,3 @@ +Fix issue where :func:`urllib.request.pathname2url` and +:func:`~urllib.request.url2pathname` removed slashes from Windows DOS drive +paths and URLs. From 7973aac8ff43abb1999a2f0c8531e75b60712235 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 31 Oct 2024 18:40:04 +0000 Subject: [PATCH 2/5] url2pathname(): unquote before converting separators --- Lib/nturl2path.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index a70bd30fbf390a..306e9760d75b40 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -24,14 +24,14 @@ def url2pathname(url): # convert this to \\host\path\on\remote\host # (notice halving of slashes at the start of the path) url = url[2:] - return urllib.parse.unquote(url.replace('/', '\\')) + return urllib.parse.unquote(url).replace('/', '\\') comp = url.split('|') if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: error = 'Bad URL: ' + url raise OSError(error) drive = comp[0][-1].upper() - tail = comp[1].replace('/', '\\') - path = drive + ':' + urllib.parse.unquote(tail) + tail = urllib.parse.unquote(comp[1]) + path = drive + ':' + tail.replace('/', '\\') return path def pathname2url(p): From 8d313bf005d3fffb6e0c9bbe2b12fde75c21a55b Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 31 Oct 2024 22:45:38 +0000 Subject: [PATCH 3/5] Beautify code --- Lib/nturl2path.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 306e9760d75b40..b373105167d74d 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -30,9 +30,8 @@ def url2pathname(url): error = 'Bad URL: ' + url raise OSError(error) drive = comp[0][-1].upper() - tail = urllib.parse.unquote(comp[1]) - path = drive + ':' + tail.replace('/', '\\') - return path + tail = urllib.parse.unquote(comp[1]).replace('/', '\\') + return drive + ':' + tail def pathname2url(p): """OS-specific conversion from a file system path to a relative URL @@ -59,6 +58,5 @@ def pathname2url(p): raise OSError(error) drive = urllib.parse.quote(comp[0].upper()) - tail = comp[1].replace('\\', '/') - path = '///' + drive + ':' + urllib.parse.quote(tail) - return path + tail = urllib.parse.quote(comp[1].replace('\\', '/')) + return '///' + drive + ':' + tail From d3687bb233316f66be8b4d83bb2b5d415be6792a Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 31 Oct 2024 23:21:55 +0000 Subject: [PATCH 4/5] Preserve percent-encoded forward slashes. --- Lib/nturl2path.py | 4 ++-- Lib/test/test_urllib.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index b373105167d74d..054525fa7d6442 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -24,13 +24,13 @@ def url2pathname(url): # convert this to \\host\path\on\remote\host # (notice halving of slashes at the start of the path) url = url[2:] - return urllib.parse.unquote(url).replace('/', '\\') + return urllib.parse.unquote(url.replace('/', '\\')) comp = url.split('|') if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: error = 'Bad URL: ' + url raise OSError(error) drive = comp[0][-1].upper() - tail = urllib.parse.unquote(comp[1]).replace('/', '\\') + tail = urllib.parse.unquote(comp[1].replace('/', '\\')) return drive + ':' + tail def pathname2url(p): diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 57227265fb6b25..28369b21db06d4 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1587,6 +1587,9 @@ def test_url2pathname_win(self): # Localhost paths self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file') + # Percent-encoded forward slashes are preserved for backwards compatibility + self.assertEqual(fn('C:/foo%2fbar'), 'C:\\foo/bar') + self.assertEqual(fn('//server/share/foo%2fbar'), '\\\\server\\share\\foo/bar') # Round-tripping paths = ['C:', r'\\\C\test\\', From 53094156832fa91fa9a11520fe3a5a637f50be81 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 31 Oct 2024 23:23:07 +0000 Subject: [PATCH 5/5] Restore comment :-) --- Lib/nturl2path.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 054525fa7d6442..2f9fec7893afd1 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -24,6 +24,7 @@ def url2pathname(url): # convert this to \\host\path\on\remote\host # (notice halving of slashes at the start of the path) url = url[2:] + # make sure not to convert quoted slashes :-) return urllib.parse.unquote(url.replace('/', '\\')) comp = url.split('|') if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: