This repository has been archived by the owner. It is now read-only.

Modified sanitize_url to accept IPv6 addresses #235

Closed
wants to merge 3 commits into
from
Jump to file or symbol
Failed to load files and symbols.
+27 −9
Split
@@ -356,7 +356,7 @@ def __call__(self, environ, start_response):
return self.app(environ, start_response)
class DomainListingMiddleware(object):
- domain_pattern = re.compile(r'\A/domain/(([-\w]+\.)+[\w]+)')
+ domain_pattern = re.compile(r'\A/domain/(([-\w]+\.)+[\w]+|\[[0-9a-fA-F:]+\])')
def __init__(self, app):
self.app = app
View
@@ -220,7 +220,7 @@ def base_url(url):
res = r_base_url.findall(url)
return (res and res[0]) or url
-r_domain = re.compile("(?i)(?:.+?://)?(?:www[\d]*\.)?([^/:#?]*)")
+r_domain = re.compile("(?i)(?:.+?://)?(?:www[\d]*\.)?(\[[0-9a-fA-F:]+\]|[^/:#?]*)")
def domain(s):
"""
Takes a URL and returns the domain part, minus www., if
@@ -266,7 +266,7 @@ def get_title(url):
return None
valid_schemes = ('http', 'https', 'ftp', 'mailto')
-valid_dns = re.compile('\A[-a-zA-Z0-9]+\Z')
+valid_dns = re.compile('\A[-a-zA-Z0-9:]+\Z')
def sanitize_url(url, require_scheme = False):
"""Validates that the url is of the form
@@ -379,7 +379,8 @@ class UrlParser(object):
__slots__ = ['scheme', 'path', 'params', 'query',
'fragment', 'username', 'password', 'hostname',
- 'port', '_url_updates', '_orig_url', '_query_dict']
+ 'port', '_url_updates', '_orig_url', '_query_dict',
+ 'is_ipv6']
valid_schemes = ('http', 'https', 'ftp', 'mailto')
cname_get = "cnameframe"
@@ -389,6 +390,9 @@ def __init__(self, url):
for s in self.__slots__:
if hasattr(u, s):
setattr(self, s, getattr(u, s))
+ self.is_ipv6 = False
+ if getattr(u, 'netloc', '').startswith('['):
+ self.is_ipv6 = True
self._url_updates = {}
self._orig_url = url
self._query_dict = None
@@ -459,8 +463,19 @@ def unparse(self):
q.update(self._url_updates)
q = query_string(q).lstrip('?')
- # make sure the port is not doubly specified
- if self.port and ":" in self.hostname:
+ # if this is ipv6 address, remove brackets from hostname
+ if self.hostname and self.hostname.startswith('[') and ']' in self.hostname:
+ self.is_ipv6 = True
+ self.hostname = self.hostname[1:]
+ self.hostname = self.hostname[:self.hostname.index(']')]
+
+ # if this is marked as ipv6 address but it is not, remove the mark
+ if self.hostname and self.is_ipv6:
+ if not all(c in '0123456789abcdefABCDEF:' for c in self.hostname):
+ self.is_ipv6 = False
+
+ # make sure the port is not doubly specified
+ if self.hostname and ':' in self.hostname and self.port and not self.is_ipv6:
self.hostname = self.hostname.split(':')[0]
# if there is a netloc, there had better be a scheme
@@ -539,7 +554,10 @@ def netloc(self):
if not self.hostname:
return ""
elif getattr(self, "port", None):
- return self.hostname + ":" + str(self.port)
+ if self.is_ipv6:
+ return "[" + self.hostname + "]:" + str(self.port)
+ else:
+ return self.hostname + ":" + str(self.port)
return self.hostname
def mk_cname(self, require_frame = True, subreddit = None, port = None):
@@ -948,8 +966,8 @@ def new_fn(*a,**kw):
def common_subdomain(domain1, domain2):
if not domain1 or not domain2:
return ""
- domain1 = domain1.split(":")[0]
- domain2 = domain2.split(":")[0]
+ domain1 = urlparse(domain1).hostname
+ domain2 = urlparse(domain2).hostname
if len(domain1) > len(domain2):
domain1, domain2 = domain2, domain1