Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fix some problems with IPv6 addresses in utils

  • Loading branch information...
commit 6ea6a6dc1381bef813cf597cd48e493607fb08c5 1 parent b713eca
k21 authored December 04, 2011

Showing 1 changed file with 25 additions and 7 deletions. Show diff stats Hide diff stats

  1. 32  r2/r2/lib/utils/utils.py
32  r2/r2/lib/utils/utils.py
@@ -220,7 +220,7 @@ def base_url(url):
220 220
     res = r_base_url.findall(url)
221 221
     return (res and res[0]) or url
222 222
 
223  
-r_domain = re.compile("(?i)(?:.+?://)?(?:www[\d]*\.)?([^/:#?]*)")
  223
+r_domain = re.compile("(?i)(?:.+?://)?(?:www[\d]*\.)?(\[[0-9a-fA-F:]+\]|[^/:#?]*)")
224 224
 def domain(s):
225 225
     """
226 226
         Takes a URL and returns the domain part, minus www., if
@@ -379,7 +379,8 @@ class UrlParser(object):
379 379
 
380 380
     __slots__ = ['scheme', 'path', 'params', 'query',
381 381
                  'fragment', 'username', 'password', 'hostname',
382  
-                 'port', '_url_updates', '_orig_url', '_query_dict']
  382
+                 'port', '_url_updates', '_orig_url', '_query_dict',
  383
+                 'is_ipv6']
383 384
 
384 385
     valid_schemes = ('http', 'https', 'ftp', 'mailto')
385 386
     cname_get = "cnameframe"
@@ -389,6 +390,9 @@ def __init__(self, url):
389 390
         for s in self.__slots__:
390 391
             if hasattr(u, s):
391 392
                 setattr(self, s, getattr(u, s))
  393
+        self.is_ipv6 = False
  394
+        if getattr(u, 'netloc', '').startswith('['):
  395
+            self.is_ipv6 = True
392 396
         self._url_updates = {}
393 397
         self._orig_url    = url
394 398
         self._query_dict  = None
@@ -459,8 +463,19 @@ def unparse(self):
459 463
             q.update(self._url_updates)
460 464
             q = query_string(q).lstrip('?')
461 465
 
462  
-        # make sure the port is not doubly specified 
463  
-        if self.port and ":" in self.hostname:
  466
+        # if this is ipv6 address, remove brackets from hostname
  467
+        if self.hostname and self.hostname.startswith('[') and ']' in self.hostname:
  468
+            self.is_ipv6 = True
  469
+            self.hostname = self.hostname[1:]
  470
+            self.hostname = self.hostname[:self.hostname.index(']')]
  471
+
  472
+        # if this is marked as ipv6 address but it is not, remove the mark
  473
+        if self.hostname and self.is_ipv6:
  474
+            if not all(c in '0123456789abcdefABCDEF:' for c in self.hostname):
  475
+                self.is_ipv6 = False
  476
+
  477
+        # make sure the port is not doubly specified
  478
+        if self.hostname and ':' in self.hostname and self.port and not self.is_ipv6:
464 479
             self.hostname = self.hostname.split(':')[0]
465 480
 
466 481
         # if there is a netloc, there had better be a scheme
@@ -539,7 +554,10 @@ def netloc(self):
539 554
         if not self.hostname:
540 555
             return ""
541 556
         elif getattr(self, "port", None):
542  
-            return self.hostname + ":" + str(self.port)
  557
+            if self.is_ipv6:
  558
+                return "[" + self.hostname + "]:" + str(self.port)
  559
+            else:
  560
+                return self.hostname + ":" + str(self.port)
543 561
         return self.hostname
544 562
 
545 563
     def mk_cname(self, require_frame = True, subreddit = None, port = None):
@@ -948,8 +966,8 @@ def new_fn(*a,**kw):
948 966
 def common_subdomain(domain1, domain2):
949 967
     if not domain1 or not domain2:
950 968
         return ""
951  
-    domain1 = domain1.split(":")[0]
952  
-    domain2 = domain2.split(":")[0]
  969
+    domain1 = urlparse(domain1).hostname
  970
+    domain2 = urlparse(domain2).hostname
953 971
     if len(domain1) > len(domain2):
954 972
         domain1, domain2 = domain2, domain1
955 973
 

8 notes on commit 6ea6a6d

danry25

Looks like this patch breaks submissions in the current iteration of Reddit from Source :(

k21
Owner

Yes, it has been a long time since this patch was written. I do not have a reddit locally installed right now and because the patch applied cleanly, I do not know what it breaks. Could you please be a bit more specific? Thanks.

danry25

Well, let me try & set it up on our latest reddit install, we migrated from reddit from source on Ubuntu 11.10 to reddit from source on ubuntu 12.04.1 since I last tried this patch. What all do I need to change out file wise by the way?

k21
Owner

If you are going to try applying this patch, it might be a good idea to use commit @2cfd44f instead of this one, which also fixes problems with /domain listings.

Only the following files were modified in this patch:
r2/r2/config/middleware.py
r2/r2/lib/utils/utils.py

danry25

Thanks for the info, I'll try applying the patch from the commit you recomended here in a minute & see how it goes.

danry25

interesting, so it appears to have no effect if I go & insert the changes you made into the latest reddit builds r2/r2/config/middleware.py
& r2/r2/lib/utils/utils.py files, although it doesn't break normal link submission. Submitting an IPv6 url gets reddit to reply with "you should check that url". Maybe reddit is relying on more values to screen urls now?

k21
Owner

I will try to get my local reddit installation running again and find out what the problem is, but I do not have a lot of free time, so I cannot guarantee when (whether) it will be fixed.

danry25

Oh, don't worry about it too much, I'm not particularly pressed to get this added into my reddit install. I'll clone your repository though on a different VPS & see if it works with IPv6 links.

Please sign in to comment.
Something went wrong with that request. Please try again.