From b02410c913290abf3fa3755b8f92c57560e0fe42 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 4 Jun 2019 09:02:24 -0700 Subject: [PATCH 1/2] bpo-36742: Corrects fix to handle decomposition in usernames --- Lib/test/test_urlparse.py | 15 ++++++++------- Lib/urlparse.py | 6 +++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 6fd1071bf7cdec..bc7f8625030340 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -647,13 +647,14 @@ def test_urlsplit_normalization(self): with self.assertRaises(ValueError): urlparse.urlsplit(u'http://\u30d5\u309a\ufe1380') - for scheme in [u"http", u"https", u"ftp"]: - for c in denorm_chars: - url = u"{}://netloc{}false.netloc/path".format(scheme, c) - if test_support.verbose: - print "Checking %r" % url - with self.assertRaises(ValueError): - urlparse.urlsplit(url) + for scheme in ["http", "https", "ftp"]: + for netloc in ["netloc{}false.netloc", "n{}user@netloc"]: + for c in denorm_chars: + url = "{}://{}/path".format(scheme, netloc.format(c)) + if test_support.verbose: + print "Checking %r" % url + with self.assertRaises(ValueError): + urlparse.urlsplit(url) def test_main(): test_support.run_unittest(UrlParseTestCase) diff --git a/Lib/urlparse.py b/Lib/urlparse.py index f08e0fe58432ce..41a6c37eec8b0d 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -171,9 +171,9 @@ def _checknetloc(netloc): # looking for characters like \u2100 that expand to 'a/c' # IDNA uses NFKC equivalence, so normalize for this check import unicodedata - n = netloc.rpartition('@')[2] # ignore anything to the left of '@' - n = n.replace(':', '') # ignore characters already included - n = n.replace('#', '') # but not the surrounding text + n = netloc.replace('@', '') # ignore characters already included + n = n.replace(':', '') # but not the surrounding text + n = n.replace('#', '') n = n.replace('?', '') netloc2 = unicodedata.normalize('NFKC', n) if n == netloc2: From 07392c7584605d5952b100701c5270185cb0f21e Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 4 Jun 2019 09:29:16 -0700 Subject: [PATCH 2/2] Replace accidentally removed u prefixes --- Lib/test/test_urlparse.py | 6 +++--- Lib/urlparse.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index bc7f8625030340..857ed96d92fe2d 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -647,10 +647,10 @@ def test_urlsplit_normalization(self): with self.assertRaises(ValueError): urlparse.urlsplit(u'http://\u30d5\u309a\ufe1380') - for scheme in ["http", "https", "ftp"]: - for netloc in ["netloc{}false.netloc", "n{}user@netloc"]: + for scheme in [u"http", u"https", u"ftp"]: + for netloc in [u"netloc{}false.netloc", u"n{}user@netloc"]: for c in denorm_chars: - url = "{}://{}/path".format(scheme, netloc.format(c)) + url = u"{}://{}/path".format(scheme, netloc.format(c)) if test_support.verbose: print "Checking %r" % url with self.assertRaises(ValueError): diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 41a6c37eec8b0d..6834f3c1798b09 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -171,17 +171,17 @@ def _checknetloc(netloc): # looking for characters like \u2100 that expand to 'a/c' # IDNA uses NFKC equivalence, so normalize for this check import unicodedata - n = netloc.replace('@', '') # ignore characters already included - n = n.replace(':', '') # but not the surrounding text - n = n.replace('#', '') - n = n.replace('?', '') + n = netloc.replace(u'@', u'') # ignore characters already included + n = n.replace(u':', u'') # but not the surrounding text + n = n.replace(u'#', u'') + n = n.replace(u'?', u'') netloc2 = unicodedata.normalize('NFKC', n) if n == netloc2: return for c in '/?#@:': if c in netloc2: - raise ValueError("netloc '" + netloc + "' contains invalid " + - "characters under NFKC normalization") + raise ValueError(u"netloc '" + netloc + u"' contains invalid " + + u"characters under NFKC normalization") def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: