From 54a97534c173a8008ec24f974434eee9352fdbcc Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Mon, 3 Apr 2017 21:54:38 -0700 Subject: [PATCH 1/4] urllib.parse clarify '' in scheme values. --- Lib/urllib/parse.py | 47 +++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 5d331596aec247..f047201aea6e8c 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -3,25 +3,26 @@ urlparse module is based upon the following RFC specifications. RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding -and L. Masinter, January 2005. +and L. Masinter, January 2005. RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter -and L.Masinter, December 1999. +and L. Masinter, December 1999. RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. Berners-Lee, R. Fielding, and L. Masinter, August 1998. -RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. +RFC 2368: "The mailto URL scheme", by P.Hoffman, L Masinter, J. Zawinski, +July 1998. -RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June -1995. +RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, +June 1995. -RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. -McCahill, December 1994 +RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, +M. McCahill, December 1994 RFC 3986 is considered the current standard and any future changes to urlparse module should conform with it. The urlparse module is -currently not entirely compliant with this RFC due to defacto +currently not entirely compliant with this RFC due to de-facto scenarios for parsing, and for backward compatibility purposes, some parsing quirks from older RFCs are retained. The testcases in test_urlparse.py provides a good indicator of parsing behavior. @@ -38,29 +39,37 @@ "DefragResult", "ParseResult", "SplitResult", "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] -# A classification of schemes ('' means apply by default) -uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', +# A classification of schemes. +# We have '' as scheme value to accommodate for the default value of scheme +# arg in urlsplit and urlparse. + +uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap', 'wais', 'file', 'https', 'shttp', 'mms', - 'prospero', 'rtsp', 'rtspu', '', 'sftp', + 'prospero', 'rtsp', 'rtspu', 'sftp', 'svn', 'svn+ssh', 'ws', 'wss'] -uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', + +uses_netloc = ['', 'ftp', 'http', 'gopher', 'nntp', 'telnet', 'imap', 'wais', 'file', 'mms', 'https', 'shttp', - 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', + 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh', 'ws', 'wss'] -uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', + +uses_params = ['', 'ftp', 'hdl', 'prospero', 'http', 'imap', 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', - 'mms', '', 'sftp', 'tel'] + 'mms', 'sftp', 'tel'] # These are not actually used anymore, but should stay for backwards # compatibility. (They are undocumented, but have a public-looking name.) + non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] -uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] -uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', + +uses_query = ['', 'http', 'wais', 'imap', 'https', 'shttp', 'mms', + 'gopher', 'rtsp', 'rtspu', 'sip', 'sips'] + +uses_fragment = ['', 'ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais', 'https', 'shttp', 'snews', - 'file', 'prospero', ''] + 'file', 'prospero'] # Characters valid in scheme names scheme_chars = ('abcdefghijklmnopqrstuvwxyz' From 8428a71fc132fb45828da0cb7bdc3c8e9a0b66fa Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Fri, 7 Apr 2017 18:44:34 -0700 Subject: [PATCH 2/4] Address Review comments. --- Lib/urllib/parse.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index f047201aea6e8c..575380c0c9fdfe 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -22,7 +22,7 @@ RFC 3986 is considered the current standard and any future changes to urlparse module should conform with it. The urlparse module is -currently not entirely compliant with this RFC due to de-facto +currently not entirely compliant with this RFC due to de facto scenarios for parsing, and for backward compatibility purposes, some parsing quirks from older RFCs are retained. The testcases in test_urlparse.py provides a good indicator of parsing behavior. @@ -40,8 +40,7 @@ "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] # A classification of schemes. -# We have '' as scheme value to accommodate for the default value of scheme -# arg in urlsplit and urlparse. +# We use '' as the scheme value for default scheme in urlsplit and urlparse. uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap', 'wais', 'file', 'https', 'shttp', 'mms', From d18aaef75271b5c40cc00163405cfd55da7e4685 Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Sun, 9 Apr 2017 18:45:33 -0700 Subject: [PATCH 3/4] Address review comment. --- Lib/urllib/parse.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 575380c0c9fdfe..b93762cae06533 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -40,7 +40,8 @@ "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] # A classification of schemes. -# We use '' as the scheme value for default scheme in urlsplit and urlparse. +# The empty string classifies URLs with no scheme specified, +# being the default value returned by “urlsplit” and “urlparse”. uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap', 'wais', 'file', 'https', 'shttp', 'mms', From dadfa9d6c1d303acb76b76dcced9ec2fc61634d3 Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Wed, 17 May 2017 19:10:51 -0700 Subject: [PATCH 4/4] Removing the docstring changes. --- Lib/urllib/parse.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index b93762cae06533..1af2906e36bfdd 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -3,26 +3,25 @@ urlparse module is based upon the following RFC specifications. RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding -and L. Masinter, January 2005. +and L. Masinter, January 2005. RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter -and L. Masinter, December 1999. +and L.Masinter, December 1999. RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. Berners-Lee, R. Fielding, and L. Masinter, August 1998. -RFC 2368: "The mailto URL scheme", by P.Hoffman, L Masinter, J. Zawinski, -July 1998. +RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. -RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, -June 1995. +RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June +1995. -RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, -M. McCahill, December 1994 +RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. +McCahill, December 1994 RFC 3986 is considered the current standard and any future changes to urlparse module should conform with it. The urlparse module is -currently not entirely compliant with this RFC due to de facto +currently not entirely compliant with this RFC due to defacto scenarios for parsing, and for backward compatibility purposes, some parsing quirks from older RFCs are retained. The testcases in test_urlparse.py provides a good indicator of parsing behavior.