Skip to content

Commit

Permalink
Merge pull request #12 from sonique6784/feature/empty_url_and_doubles…
Browse files Browse the repository at this point in the history
…lashurl

prevent exception when empty string + support for URL starting with double slash
  • Loading branch information
niksite committed Oct 31, 2018
2 parents 8cfc969 + f7d0d7c commit 59977b5
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
4 changes: 4 additions & 0 deletions tests/test_url_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from url_normalize import url_normalize

EXPECTED_RESULTS = {
'':
'', # empty string
'/foo/bar/.':
'/foo/bar/',
'/foo/bar/./':
Expand Down Expand Up @@ -49,6 +51,8 @@
'/foo/',
'/foo///bar//':
'/foo/bar/',
'//www.foo.com/':
'https://www.foo.com/',
'http://www.foo.com:80/foo':
'http://www.foo.com/foo',
'http://www.foo.com:8000/foo':
Expand Down
12 changes: 11 additions & 1 deletion url_normalize/url_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
This fork author: Nikolay Panov (<pythoneer@npanov.com>)
History:
* 28 Oct 2018: Support empty string and double slash urls (//domain.tld/foo.html)
* 07 Jul 2017: Same code support both Python 3 and Python 2.
* 05 Jan 2016: Python 3 compatibility, please use version 1.2 on python 2
* 29 Dec 2015: PEP8, setup.py
Expand All @@ -36,7 +37,7 @@
from urllib.parse import quote, unquote, urlsplit, urlunsplit

__license__ = "Python"
__version__ = "1.3.3"
__version__ = "1.3.4"


def _clean(string, charset='utf-8'):
Expand Down Expand Up @@ -83,10 +84,19 @@ def url_normalize(url, charset='utf-8'):
charset : string : The target charset for the URL if the url was
given as unicode string.
"""

# invalid empty / null url
if url is None or len(url) == 0:
return url

# if there is no scheme use http as default scheme
if url[0] not in ['/', '-'] and ':' not in url[:7]:
url = 'http://' + url

# protocol indeferent url (http|https), prepend https
if len(url) > 2 and url[0] == '/' and url[1] == '/' and ':' not in url[:7]:
url = 'https:' + url

# shebang urls support
url = url.replace('#!', '?_escaped_fragment_=')

Expand Down

0 comments on commit 59977b5

Please sign in to comment.