Optimization of looking up Page by URL, reducing it to 2 or 3 queries…

… maximum.
spookylukey · Jan 17, 2012 · 2519472 · 2519472
1 parent f3481c0
commit 2519472
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 39 deletions.
diff --git a/fiber/context_processors.py b/fiber/context_processors.py
@@ -2,7 +2,6 @@
 
 from app_settings import EXCLUDE_URLS
 from models import Page
-from utils.urls import get_named_url_from_quoted_url, is_quoted_url
 
 
 def page_info(request):
@@ -20,44 +19,7 @@ def page_info(request):
             if re.search(exclude_url, request.path.lstrip('/')):
                 return context
 
-    """
-    Find Page that matches the requested URL.
-
-    First check if there is a Page whose `url` matches the requested URL.
-    """
-    try:
-        page = Page.objects.get(url__exact=url)
-    except Page.DoesNotExist:
-        pass
-
-    """
-    If no Page has been found, check a subset of Pages (whose `url` or
-    `relative_url` contain the rightmost part of the requested URL), to see
-    if their `get_absolute_url()` matches the requested URL entirely.
-    """
-    if not page:
-        last_url_part = url.rstrip('/').rsplit('/', 1)[-1]
-        if last_url_part:
-            page_candidates = Page.objects.exclude(url__exact='', ) \
-                .filter(url__icontains=last_url_part)
-            if page_candidates:
-                for page_candidate in page_candidates:
-                    if page_candidate.get_absolute_url() == url:
-                        page = page_candidate
-                        break
-
-    """
-    If no Page has been found, try to find a Page by matching the
-    requested URL with reversed `named_url`s.
-    """
-    if not page:
-        page_candidates = Page.objects.exclude(url__exact='')
-        if page_candidates:
-            for page_candidate in page_candidates:
-                if is_quoted_url(page_candidate.url):
-                    if get_named_url_from_quoted_url(page_candidate.url) == url:
-                        page = page_candidate
-                        break
+    page = Page.objects.get_by_url(url)
 
     """
     Block access to pages that the current user isn't supposed to see.

diff --git a/fiber/managers.py b/fiber/managers.py
@@ -6,6 +6,7 @@
 from mptt.managers import TreeManager
 
 from fiber import editor
+from fiber.utils.urls import get_named_url_from_quoted_url
 
 
 class ContentItemManager(models.Manager):
@@ -133,3 +134,51 @@ def link_parent_objects(self, pages):
             else:
                 p.parent = page_dict[p.parent_id]
         return pages
+
+    def get_by_url(self, url):
+        """
+        Retrieve a page that matches the given URL.
+        """
+        # We need to check against get_absolute_url(). Typically this will
+        # recursively access .parent, so we retrieve the ancestors at the same time
+        # for efficiency.
+        qs = self.get_query_set()
+
+        # First check if there is a Page whose `url` matches the requested URL.
+        try:
+            return qs.get(url__exact=url)
+        except self.model.DoesNotExist:
+            pass
+
+        # If no Page has been found, check a subset of Pages (whose `url` or
+        # `relative_url` contain the rightmost part of the requested URL), to see
+        # if their `get_absolute_url()` matches the requested URL entirely.
+
+        # Since get_absolute_url() accesses .parent recursively, we
+        # load the ancestors efficiently in one query first
+
+        last_url_part = url.rstrip('/').rsplit('/', 1)[-1]
+        if last_url_part:
+            page_candidates = qs.exclude(url__exact='', ) \
+                .filter(url__icontains=last_url_part)
+
+            # We need all the ancestors of all the candidates. We can do this in
+            # two queries - one for candidates, one for ancestors:
+            route_pages = self.model.objects.none()
+            for p in page_candidates:
+                route_pages = route_pages | qs.filter(lft__lte=p.lft,
+                                                      rght__gte=p.rght)
+            route_pages = self.link_parent_objects(route_pages)
+            # Use page_candidates that have parent objects attached
+            page_candidates = [p for p in route_pages if last_url_part in p.url]
+
+            for page in page_candidates:
+                if page.get_absolute_url() == url:
+                    return page
+
+        # If no Page has been found, try to find a Page by matching the
+        # requested URL with reversed `named_url`s.
+        page_candidates = qs.filter(url__startswith='"', url__endswith='"')
+        for page in page_candidates:
+            if get_named_url_from_quoted_url(p.url) == url:
+                return page
diff --git a/fiber/utils/urls.py b/fiber/utils/urls.py
@@ -9,6 +9,7 @@ def get_admin_change_url(instance):
     return reverse(named_url, args=(instance.pk,))
 
 
+# PageManage.get_by_url duplicates this logic for efficient DB use
 def is_quoted_url(quoted_url):
     return quoted_url.startswith('"') and quoted_url.endswith('"')