Skip to content

Commit

Permalink
bpo-27657: Fix urlparse() with numeric paths (GH-661)
Browse files Browse the repository at this point in the history
* bpo-27657: Fix urlparse() with numeric paths

Revert parsing decision from bpo-754016 in favor of the documented
consensus in bpo-16932 of how to treat strings without a // to
designate the netloc.

* bpo-22891: Remove urlsplit() optimization for 'http' prefixed inputs.
(cherry picked from commit 5a88d50)

Co-authored-by: Tim Graham <timograham@gmail.com>
  • Loading branch information
miss-islington and timgraham committed Oct 18, 2019
1 parent 1bceb0e commit 82b5f6b
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 25 deletions.
10 changes: 6 additions & 4 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,15 +709,17 @@ def test_withoutscheme(self):

def test_portseparator(self):
# Issue 754016 makes changes for port separator ':' from scheme separator
self.assertEqual(urllib.parse.urlparse("path:80"),
('','','path:80','','',''))
self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
('http','www.python.org:80','','','',''))
# As usual, need to check bytes input as well
self.assertEqual(urllib.parse.urlparse(b"path:80"),
(b'',b'',b'path:80',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
Expand Down
22 changes: 1 addition & 21 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,31 +426,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
netloc = query = fragment = ''
i = url.find(':')
if i > 0:
if url[:i] == 'http': # optimize the common case
url = url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
_checknetloc(netloc)
v = SplitResult('http', netloc, url, query, fragment)
_parse_cache[key] = v
return _coerce_result(v)
for c in url[:i]:
if c not in scheme_chars:
break
else:
# make sure "url" is not actually a port number (in which case
# "scheme" is really part of the path)
rest = url[i+1:]
if not rest or any(c not in '0123456789' for c in rest):
# not a port number
scheme, url = url[:i].lower(), rest
scheme, url = url[:i].lower(), url[i+1:]

if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix urllib.parse.urlparse() with numeric paths. A string like "path:80" is
no longer parsed as a path but as a scheme ("path") and a path ("80").

0 comments on commit 82b5f6b

Please sign in to comment.