Skip to content

Commit

Permalink
Improve search performance; *requires reindexing*
Browse files Browse the repository at this point in the history
* Add a filter query to every user query so that only results from the
  appropriate model are returned
* Add new field `users_granted_permission` to `AssetIndex` and
  `CollectionIndex`
* Filter using that field to greatly reduce number of results returned by
  Whoosh
  • Loading branch information
jnm committed Oct 10, 2016
1 parent b139a09 commit 1cae3d6
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 6 deletions.
31 changes: 25 additions & 6 deletions kpi/filters.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import haystack
from distutils.util import strtobool
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.core.exceptions import FieldError
from rest_framework import filters
from haystack.backends.whoosh_backend import WhooshSearchBackend
from whoosh.qparser import QueryParser
from haystack.query import SearchQuerySet
from haystack.constants import DJANGO_CT, ITERATOR_LOAD_PER_QUERY
from haystack.inputs import Raw
from haystack.constants import ITERATOR_LOAD_PER_QUERY
from haystack.query import SearchQuerySet
from haystack.utils import get_model_ct
from rest_framework import filters
from whoosh.qparser import QueryParser
from whoosh.query import Term, And

from .models.object_permission import get_objects_for_user, get_anonymous_user

Expand Down Expand Up @@ -108,10 +111,26 @@ def filter_queryset(self, request, queryset, view):
if not search_backend.setup_complete:
search_backend.setup()
searcher = search_backend.index.searcher()
query = QueryParser('text', search_backend.index.schema).parse(
# Parse the user's query
user_query = QueryParser('text', search_backend.index.schema).parse(
request.query_params['q'])
# Construct a query to restrict the search to the appropriate model
filter_query = Term(DJANGO_CT, get_model_ct(queryset.model))
# Does the search index for this model have a field that allows
# filtering by permissions?
haystack_index = haystack.connections[
'default'].get_unified_index().get_index(queryset.model)
if hasattr(haystack_index, 'users_granted_permission'):
# Also restrict the search to records that the user can access
filter_query &= Term(
'users_granted_permission', request.user.username)
results = searcher.search(
query, scored=False, sortedby=None, limit=None)
user_query,
filter=filter_query,
scored=False,
sortedby=None,
limit=None
)
pk_type = type(queryset_pks[0])
results_pks = {
# Coerce each `django_id` from unicode to the appropriate type,
Expand Down
13 changes: 13 additions & 0 deletions kpi/search_indexes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
from haystack import indexes
from taggit.models import Tag

from .models import Asset, Collection

class FieldPreparersMixin:
Expand All @@ -10,6 +11,7 @@ class FieldPreparersMixin:
We'll find commas (and spaces, to mimic Gmail) and replace them with dashes.
'''
COMMA_SPACE_RE = re.compile('[, ]')

def _escape_comma_space(self, string, repl='-'):
return re.sub(self.COMMA_SPACE_RE, repl, string)

Expand All @@ -18,28 +20,37 @@ def prepare_tag(self, obj):
self._escape_comma_space(t.name)
for t in obj.tags.all()
]

def prepare_name__exact(self, obj):
return self._escape_comma_space(obj.name)

def prepare_asset_type(self, obj):
return self._escape_comma_space(obj.asset_type)

def prepare_owner__username__exact(self, obj):
if obj.owner:
return self._escape_comma_space(obj.owner.username)

def prepare_parent__name__exact(self, obj):
if obj.parent:
return self._escape_comma_space(obj.parent.name)

def prepare_parent__uid(self, obj):
'''
Trivial method needed because MultiValueField(model_attr='parent__uid')
ends up giving each character in the UID its own entry in the lexicon
'''
if obj.parent:
return obj.parent.uid

def prepare_ancestor__uid(self, obj):
ancestors = obj.get_ancestors_or_none()
if ancestors:
return [a.uid for a in ancestors]

def prepare_users_granted_permission(self, obj):
return [u.username for u in obj.get_users_with_perms()]


class AssetIndex(indexes.SearchIndex, indexes.Indexable, FieldPreparersMixin):
# Haystack usually doesn't deal well with double underscores in field names
Expand All @@ -65,6 +76,7 @@ class AssetIndex(indexes.SearchIndex, indexes.Indexable, FieldPreparersMixin):
has_deployment = indexes.MultiValueField()
deployment__identifier = indexes.MultiValueField()
deployment__active = indexes.MultiValueField()
users_granted_permission = indexes.MultiValueField()

def prepare_has_deployment(self, obj):
return str(obj.has_deployment).lower()
Expand Down Expand Up @@ -95,6 +107,7 @@ class CollectionIndex(indexes.SearchIndex, indexes.Indexable, FieldPreparersMixi
owner__username__exact = indexes.MultiValueField()
parent__name__exact = indexes.MultiValueField()
parent__uid = indexes.MultiValueField()
users_granted_permission = indexes.MultiValueField()
def get_model(self):
return Collection

Expand Down

0 comments on commit 1cae3d6

Please sign in to comment.