Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

[bug 687933] Port search view to oedipus.

  • Loading branch information...
commit 26302fe91f842bfd7c742c369de74b13db3afbad 2 parents ddd2e86 + 63b4009
Erik Rose erikrose authored
21 apps/forums/models.py
View
@@ -13,6 +13,8 @@
from sumo.models import ModelBase
from search.utils import crc32
+from search import S
+
def _last_post_from(posts, exclude_post=None):
"""Return the most recent post in the given set, excluding the given post.
@@ -103,12 +105,6 @@ class Thread(NotificationsMixin, ModelBase):
class Meta:
ordering = ['-is_sticky', '-last_post__created']
- class SphinxMeta(object):
- index = 'discussion_forums'
- filter_mapping = {
- 'title': crc32,
- 'content': crc32}
-
def __setattr__(self, attr, val):
"""Notice when the forum field changes.
@@ -190,6 +186,10 @@ class Post(ActionMixin, ModelBase):
class Meta:
ordering = ['created']
+ class SphinxMeta(object):
+ index = 'discussion_forums'
+ filter_mapping = {'author_ord': crc32}
+
def __unicode__(self):
return self.content[:50]
@@ -251,3 +251,12 @@ def get_absolute_url(self):
@property
def content_parsed(self):
return wiki_to_html(self.content)
+
+
+# The index is on Post, but with the Thread.title for the Thread
+# related to the Post. We base the S off of Post because we need
+# to excerpt content.
+discussion_search = (
+ S(Post).weight(title=2, content=1)
+ .group_by('thread_id', '-@group')
+ .order_by('created'))
16 apps/questions/models.py
View
@@ -68,7 +68,9 @@ class Meta:
class SphinxMeta(object):
index = 'questions'
filter_mapping = {
- 'tag': crc32}
+ 'tag': crc32,
+ 'question_creator': crc32,
+ 'answer_creator': crc32}
id_field = 'question_id'
def __unicode__(self):
@@ -495,12 +497,12 @@ def _has_beta(version, dev_releases):
def _content_parsed(obj):
- cache_key = obj.html_cache_key % obj.id
- html = cache.get(cache_key)
- if html is None:
- html = wiki_to_html(obj.content)
- cache.add(cache_key, html)
- return html
+ cache_key = obj.html_cache_key % obj.id
+ html = cache.get(cache_key)
+ if html is None:
+ html = wiki_to_html(obj.content)
+ cache.add(cache_key, html)
+ return html
question_search = (
20 apps/search/__init__.py
View
@@ -5,9 +5,6 @@
import oedipus
from tower import ugettext_lazy as _lazy
-from search.sphinxapi import (SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC,
- SPH_SORT_EXTENDED, SPH_GROUPBY_ATTR)
-
WHERE_WIKI = 1
WHERE_SUPPORT = 2
@@ -25,10 +22,10 @@
)
GROUPSORT = (
- '@relevance DESC, age ASC', # default
- 'updated DESC',
- 'created DESC',
- 'replies DESC',
+ ('-@relevance', 'age'), # default
+ '-updated',
+ '-created',
+ '-replies',
)
# For discussion forums
@@ -66,11 +63,10 @@
)
SORT_QUESTIONS = (
- #: (mode, clause)
- (SPH_SORT_EXTENDED, '@relevance DESC, age ASC'), # default
- (SPH_SORT_ATTR_DESC, 'updated'),
- (SPH_SORT_ATTR_DESC, 'created'),
- (SPH_SORT_ATTR_DESC, 'replies'),
+ ('-@relevance', 'age'), # default
+ ('updated',),
+ ('created',),
+ ('replies',)
)
SORTBY_QUESTIONS = (
174 apps/search/clients.py
View
@@ -1,174 +0,0 @@
-import logging
-import os
-import re
-import socket
-
-from django.conf import settings
-from django.utils.encoding import smart_unicode
-
-import bleach
-
-from search import sphinxapi
-
-
-log = logging.getLogger('k.search')
-
-
-class SearchError(Exception):
- """An error occurred executing a search."""
-
-
-class SearchClient(object):
- """
- Base-class for search clients
- """
-
- match_mode = sphinxapi.SPH_MATCH_EXTENDED2
- rank_mode = sphinxapi.SPH_RANK_PROXIMITY_BM25
- sort_mode = (sphinxapi.SPH_SORT_RELEVANCE, '')
-
- def __init__(self):
- self.sphinx = sphinxapi.SphinxClient()
- if os.environ.get('DJANGO_ENVIRONMENT') == 'test':
- self.sphinx.SetServer(settings.SPHINX_HOST,
- settings.TEST_SPHINX_PORT)
- else:
- self.sphinx.SetServer(settings.SPHINX_HOST, settings.SPHINX_PORT)
-
- self.sphinx.SetMatchMode(self.match_mode)
- self.sphinx.SetRankingMode(self.rank_mode)
- self.sphinx.SetSortMode(*self.sort_mode)
-
- def _prepare_filters(self, filters=None):
- """Process filters and filter ranges."""
- sc = self.sphinx
- sc.ResetFilters()
- if filters is None:
- filters = []
-
- for f in filters:
- if f.get('exclude') and not f.get('value'):
- # Sphinx doesn't like excluding nothing: excludes everything.
- continue
- if f.get('range', False):
- sc.SetFilterRange(f['filter'], f['min'],
- f['max'], f.get('exclude', False))
- else:
- sc.SetFilter(f['filter'], f['value'],
- f.get('exclude', False))
-
- def _prepare(self):
- """Override to twiddle `self.sphinx` before the query gets sent."""
-
- def _sanitize_query(self, query):
- """Strip control characters that cause problems."""
- query = re.sub(r'(?<=\S)\-', '\-', query)
- return query.replace('^', '').replace('$', '')
-
- def _query_sphinx(self, query=''):
- """
- Pass the query to the SphinxClient() and return the results.
-
- Catches common exceptions raised by Sphinx.
- """
-
- query = self._sanitize_query(query)
-
- try:
- result = self.sphinx.Query(query, self.index)
- except socket.timeout:
- log.error('Query has timed out!')
- raise SearchError('Query has timed out!')
- except socket.error, msg:
- log.error('Query socket error: %s' % msg)
- raise SearchError('Could not execute your search!')
- except Exception, e:
- log.error('Sphinx threw an unknown exception: %s' % e)
- raise SearchError('Sphinx threw an unknown exception!')
-
- if result:
- return result['matches']
- else:
- return []
-
- def query(self, query, filters=None, offset=0,
- limit=settings.SEARCH_MAX_RESULTS):
- """Query the search index."""
- self._prepare_filters(filters)
-
- self.sphinx.SetFieldWeights(self.weights)
- self.sphinx.SetLimits(offset, limit)
-
- self._prepare()
- return self._query_sphinx(query)
-
- def excerpt(self, result, query):
- """
- Given document content and a search query (both strings), uses
- Sphinx to build an excerpt, highlighting the keywords from the
- query.
-
- Length of the final excerpt is roughly determined by
- SEARCH_SUMMARY_LENGTH in settings.py.
- """
- if not isinstance(result, basestring):
- return ''
- documents = [result]
-
- try:
- # build excerpts that are longer and truncate
- # see multiplier constant definition for details
- excerpt = self.sphinx.BuildExcerpts(
- documents, self.index, query,
- {'limit': settings.SEARCH_SUMMARY_LENGTH})[0]
- except socket.error:
- log.error('Socket error building excerpt!')
- excerpt = ''
- except socket.timeout:
- log.error('Building excerpt timed out!')
- excerpt = ''
-
- return bleach.clean(smart_unicode(excerpt))
-
- def set_sort_mode(self, mode, clause=''):
- self.sphinx.SetSortMode(mode, clause)
-
-
-class QuestionsClient(SearchClient):
- index = 'questions'
- weights = {'title': 4, 'question_content': 3, 'answer_content': 3}
- groupsort = '@group desc'
-
- def _prepare(self):
- """Prepare to group the answers together."""
- super(QuestionsClient, self)._prepare()
- self.sphinx.SetGroupBy('question_id', sphinxapi.SPH_GROUPBY_ATTR,
- self.groupsort)
-
-
-class WikiClient(SearchClient):
- """
- Search the knowledge base
- """
- index = 'wiki_pages'
- weights = {'title': 6, 'content': 1, 'keywords': 4, 'summary': 2}
-
-
-class DiscussionClient(SearchClient):
- """
- Search the discussion forums.
- """
- index = 'discussion_forums'
- weights = {'title': 2, 'content': 1}
- groupsort = '@group desc'
- sort_mode = (sphinxapi.SPH_SORT_ATTR_ASC, 'created')
-
- def _prepare(self):
- """Group posts together, and ensure thread['attrs']['updated'] is the
- last post's updated date.
-
- """
- super(DiscussionClient, self)._prepare()
- self.sphinx.SetGroupBy('thread_id', sphinxapi.SPH_GROUPBY_ATTR,
- self.groupsort)
- self.sphinx.SetSortMode(*self.sort_mode)
205 apps/search/tests/test_search.py
View
@@ -5,7 +5,6 @@
import shutil
import time
import json
-import socket
from django.conf import settings
from django.contrib.sites.models import Site
@@ -15,17 +14,17 @@
import jingo
import mock
from nose import SkipTest
-from nose.tools import assert_raises, eq_
+from nose.tools import eq_
from pyquery import PyQuery as pq
-from forums.models import Post
+from forums.models import Thread, discussion_search
+from questions.models import question_search
import search as constants
-from search.clients import (WikiClient, QuestionsClient,
- DiscussionClient, SearchError)
-from search.utils import start_sphinx, stop_sphinx, reindex, crc32
+from search.utils import (start_sphinx, stop_sphinx, reindex,
+ clean_excerpt)
from sumo.tests import LocalizingClient, TestCase
from sumo.urlresolvers import reverse
-from wiki.models import Document
+from wiki.models import wiki_search
def render(s, context):
@@ -67,15 +66,6 @@ def tearDownClass(cls):
super(SphinxTestCase, cls).tearDownClass()
-def test_sphinx_down():
- """
- Tests that the client times out when Sphinx is down.
- """
- wc = WikiClient()
- wc.sphinx.SetServer('localhost', 65535)
- assert_raises(SearchError, wc.query, 'test')
-
-
# TODO(jsocol):
# * Add tests for all Questions filters.
# * Replace magic numbers with the defined constants.
@@ -84,8 +74,7 @@ class SearchTest(SphinxTestCase):
client_class = LocalizingClient
def test_indexer(self):
- wc = WikiClient()
- results = wc.query('audio')
+ results = wiki_search.query('audio')
eq_(2, len(results))
def test_content(self):
@@ -130,21 +119,24 @@ def test_search_metrics(self):
eq_('0', q['r'])
def test_category(self):
- wc = WikiClient()
- results = wc.query('', ({'filter': 'category', 'value': [10]},))
+ results = wiki_search.filter(category__in=[10])
eq_(5, len(results))
- results = wc.query('', ({'filter': 'category', 'value': [30]},))
+ results = wiki_search.filter(category__in=[30])
eq_(1, len(results))
def test_category_exclude_nothing(self):
"""Excluding no categories should return results."""
- clients = ((WikiClient(), 'category'),
- (QuestionsClient(), 'replies'),
- (DiscussionClient(), 'author_ord'))
- for client, filter in clients:
- results = client.query('', ({'filter': filter, 'exclude': True,
- 'value': []},))
- self.assertNotEquals(0, len(results))
+ # Note: We keep the query('') here to force a new S and thus
+ # not inadvertently test with an S that's not in an original
+ # state.
+ results = wiki_search.query('')
+ self.assertNotEquals(0, len(results))
+
+ results = question_search.query('')
+ self.assertNotEquals(0, len(results))
+
+ results = discussion_search.query('')
+ self.assertNotEquals(0, len(results))
def test_category_exclude(self):
q = {'q': 'audio', 'format': 'json', 'w': 1}
@@ -162,36 +154,32 @@ def test_category_invalid(self):
def test_no_filter(self):
"""Test searching with no filters."""
- wc = WikiClient()
-
- results = wc.query('')
+ # Note: We keep the query('') here to force a new S and thus
+ # not inadvertently test with an S that's not in an original
+ # state.
+ results = list(wiki_search.query(''))
eq_(6, len(results))
def test_range_filter(self):
"""Test filtering on a range."""
- wc = WikiClient()
- filter_ = ({'filter': 'updated',
- 'max': 1285765791,
- 'min': 1284664176,
- 'range': True},)
- results = wc.query('', filter_)
+ results = wiki_search.filter(updated__gte=1284664176,
+ updated__lte=1285765791)
eq_(2, len(results))
def test_sort_mode(self):
"""Test set_sort_mode()."""
# Initialize client and attrs.
- qc = QuestionsClient()
- test_for = ('updated', 'created', 'replies')
+ test_for = ('updated', 'created', 'answers')
i = 0
for sort_mode in constants.SORT_QUESTIONS[1:]: # Skip default sorting.
- qc.set_sort_mode(sort_mode[0], sort_mode[1])
- results = qc.query('')
+ results = list(question_search.order_by(*sort_mode)
+ .values_dict(test_for[i]))
eq_(4, len(results))
# Compare first and second.
- x = results[0]['attrs'][test_for[i]]
- y = results[1]['attrs'][test_for[i]]
+ x = results[0][test_for[i]]
+ y = results[1][test_for[i]]
assert x > y, '%s !> %s' % (x, y)
i += 1
@@ -345,42 +333,33 @@ def test_products_inherit(self):
def test_unicode_excerpt(self):
"""Unicode characters in the excerpt should not be a problem."""
- wc = WikiClient()
- page = Document.objects.get(pk=2)
+ ws = (wiki_search.highlight('html')
+ .query(u'\u30c1')
+ .values_dict('html'))
+ results = list(ws)
try:
- excerpt = wc.excerpt(page.html, u'\u3068')
+ excerpt = ws.excerpt(results[0])
render('{{ c }}', {'c': excerpt})
except UnicodeDecodeError:
self.fail('Raised UnicodeDecodeError.')
def test_utf8_excerpt(self):
"""Characters should stay in UTF-8."""
- wc = WikiClient()
- page = Document.objects.get(pk=4)
q = u'fa\xe7on'
- excerpt = wc.excerpt(page.html, q)
+ ws = (wiki_search.highlight('html')
+ .query(u'fa\xe7on')
+ .values_dict('html'))
+
+ results = list(ws)
+ # page = Document.objects.get(pk=4)
+ excerpt = clean_excerpt(ws.excerpt(results[0])[0])
assert q in excerpt, u'%s not in %s' % (q, excerpt)
def test_clean_excerpt(self):
- """SearchClient.excerpt() should not allow disallowed HTML through."""
- wc = WikiClient() # Index strips HTML
- qc = QuestionsClient() # Index does not strip HTML
- input = 'test <div>the start of something</div>'
- output_strip = '<b>test</b> the start of something'
- output_nostrip = ('<b>test</b> &lt;div&gt;the start of '
- 'something&lt;/div&gt;')
- eq_(output_strip, wc.excerpt(input, 'test'))
- eq_(output_nostrip, qc.excerpt(input, 'test'))
-
- def test_empty_content_excerpt(self):
- """SearchClient.excerpt() returns empty string for empty content."""
- wc = WikiClient()
- eq_('', wc.excerpt('', 'test'))
-
- def test_none_content_excerpt(self):
- """SearchClient.excerpt() returns empty string for None type."""
- wc = WikiClient()
- eq_('', wc.excerpt(None, 'test'))
+ """clean_excerpt() should not allow disallowed HTML through."""
+ in_ = '<b>test</b> <div>the start of something</div>'
+ out_ = '<b>test</b> &lt;div&gt;the start of something&lt;/div&gt;'
+ eq_(out_, clean_excerpt(in_))
def test_meta_tags(self):
url_ = reverse('search')
@@ -392,12 +371,12 @@ def test_meta_tags(self):
def test_discussion_sanity(self):
"""Sanity check for discussion forums search client."""
- dc = DiscussionClient()
- filters_f = [{'filter': 'author_ord', 'value': (crc32('admin'),)}]
- results = dc.query(u'', filters_f)
+ dis_s = (discussion_search.highlight('content')
+ .filter(author_ord='admin')
+ .query('post').values_dict('id', 'content'))
+ results = list(dis_s)
eq_(1, len(results))
- post = Post.objects.get(pk=results[0]['id'])
- eq_(u'yet another <b>post</b>', dc.excerpt(post.content, u'post'))
+ eq_(u'yet another <b>post</b>', dis_s.excerpt(results[0])[0])
def test_discussion_filter_author(self):
"""Filter by author in discussion forums."""
@@ -490,82 +469,85 @@ def test_discussion_filter_updated(self):
def test_discussion_sort_mode(self):
"""Test set groupsort."""
# Initialize client and attrs.
- dc = DiscussionClient()
- test_for = ('updated', 'created', 'replies')
+ ds = discussion_search
+ test_for = ('updated', 'created')
i = 0
- for groupsort in constants.GROUPSORT[1:]: # Skip default sorting.
- dc.groupsort = groupsort
- results = dc.query('')
+ # This tests -updated and -created and skips the default
+ # sorting and -replies.
+ for groupsort in constants.GROUPSORT[1:-1]:
+ results = list(ds.group_by('thread_id', groupsort))
eq_(5, len(results))
# Compare first and last.
- assert (results[0]['attrs'][test_for[i]] >
- results[-1]['attrs'][test_for[i]])
+ assert (getattr(results[0], test_for[i]) >
+ getattr(results[-1], test_for[i]))
i += 1
+ # We have to do -replies group sort separate because replies
+ # is an attribute of Thread and not Post.
+ results = list(ds.group_by('thread_id', '-replies'))
+ eq_(5, len(results))
+ t0 = Thread.objects.get(pk=results[0].thread_id)
+ tn1 = Thread.objects.get(pk=results[-1].thread_id)
+ assert (t0.replies > tn1.replies)
+
def test_wiki_index_keywords(self):
"""The keywords field of a revision is indexed."""
- wc = WikiClient()
- results = wc.query('foobar')
+ results = list(wiki_search.query('foobar'))
eq_(1, len(results))
- eq_(3, results[0]['id'])
+ eq_(3, results[0].id)
def test_wiki_index_summary(self):
"""The summary field of a revision is indexed."""
- wc = WikiClient()
- results = wc.query('whatever')
+ results = list(wiki_search.query('whatever'))
eq_(1, len(results))
- eq_(3, results[0]['id'])
+ eq_(3, results[0].id)
def test_wiki_index_content(self):
"""Obviously the content should be indexed."""
- wc = WikiClient()
- results = wc.query('video')
+ results = list(wiki_search.query('video'))
eq_(1, len(results))
- eq_(1, results[0]['id'])
+ eq_(1, results[0].id)
def test_wiki_index_strip_html(self):
"""HTML should be stripped, not indexed."""
- wc = WikiClient()
- results = wc.query('strong')
+ results = list(wiki_search.query('strong'))
eq_(0, len(results))
def test_ngram_chars(self):
"""Ideographs are handled correctly."""
- wc = WikiClient()
- results = wc.query(u'\u30c1')
+ results = list(wiki_search.query(u'\u30c1'))
eq_(1, len(results))
- eq_(2, results[0]['id'])
+ eq_(2, results[0].id)
def test_no_syntax_error(self):
"""Test that special chars cannot cause a syntax error."""
- wc = WikiClient()
- results = wc.query('video^$')
+ results = list(wiki_search.query('video^$'))
eq_(1, len(results))
- results = wc.query('video^^^$$$^')
+ results = list(wiki_search.query('video^^^$$$^'))
eq_(1, len(results))
+ results = list(wiki_search.query('google.com/ig'))
+ eq_(0, len(results))
+
def test_clean_hyphens(self):
"""Hyphens in words aren't special characters."""
- wc = WikiClient()
- results = wc.query('marque-page')
+ results = list(wiki_search.query('marque-page'))
eq_(1, len(results))
def test_exclude_words(self):
"""Excluding words with -word works."""
- wc = WikiClient()
- results = wc.query('spanish')
+ results = list(wiki_search.query('spanish'))
eq_(1, len(results))
- results = wc.query('spanish -content')
+ results = list(wiki_search.query('spanish -content'))
eq_(0, len(results))
def test_no_redirects(self):
"""Redirect articles should never appear in search results."""
- wc = WikiClient()
- results = wc.query('ghosts')
+ results = list(wiki_search.query('ghosts'))
eq_(1, len(results))
def test_search_cookie(self):
@@ -614,18 +596,3 @@ def test_archived(self):
response = self.client.get(reverse('search'), qs)
results = json.loads(response.content)['results']
eq_([], results)
-
-
-query = lambda *args, **kwargs: WikiClient().query(*args, **kwargs)
-
-
-@mock.patch('search.clients.WikiClient')
-def test_excerpt_timeout(sphinx_mock):
- def sphinx_error(cls):
- raise cls
-
- sphinx_mock.query.side_effect = lambda *a: sphinx_error(socket.timeout)
- assert_raises(SearchError, query, 'xxx')
-
- sphinx_mock.query.side_effect = lambda *a: sphinx_error(Exception)
- assert_raises(SearchError, query, 'xxx')
6 apps/search/utils.py
View
@@ -1,6 +1,8 @@
import subprocess
import zlib
+import bleach
+
from django.conf import settings
from sumo_locales import LOCALES
@@ -12,6 +14,10 @@
call = lambda x: subprocess.Popen(x, stdout=subprocess.PIPE).communicate()
+def clean_excerpt(excerpt):
+ return bleach.clean(excerpt)
+
+
def reindex(rotate=False):
"""Reindex sphinx.
257 apps/search/views.py
View
@@ -8,7 +8,6 @@
from django.contrib.sites.models import Site
from django.db.models import ObjectDoesNotExist
from django.http import HttpResponse, HttpResponseBadRequest
-from django.utils.http import urlencode
from django.utils.http import urlquote
from django.views.decorators.cache import cache_page
@@ -17,15 +16,14 @@
from mobility.decorators import mobile_template
from tower import ugettext as _
-from search.clients import (QuestionsClient, WikiClient,
- DiscussionClient, SearchError)
-from search.utils import crc32, locale_or_default, sphinx_locale
-from forums.models import Thread, Post
-from questions.models import Question, question_search
+from search import SearchError
+from search.utils import locale_or_default, clean_excerpt
+from forums.models import Thread, discussion_search
+from questions.models import question_search
import search as constants
from search.forms import SearchForm
from sumo.utils import paginate, smart_int
-from wiki.models import Document, wiki_search
+from wiki.models import wiki_search
def jsonp_is_valid(func):
@@ -100,7 +98,6 @@ def search(request, template=None):
return search_
cleaned = search_form.cleaned_data
- search_locale = (sphinx_locale(language),)
page = max(smart_int(request.GET.get('page')), 1)
offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE
@@ -112,59 +109,39 @@ def search(request, template=None):
else:
lang_name = ''
+ wiki_s = wiki_search
+ question_s = question_search
+ discussion_s = discussion_search
+
documents = []
- filters_w = []
- filters_q = []
- filters_f = []
# wiki filters
# Category filter
if cleaned['category']:
- filters_w.append({
- 'filter': 'category',
- 'value': cleaned['category'],
- })
+ wiki_s = wiki_s.filter(category__in=cleaned['category'])
if exclude_category:
- filters_w.append({
- 'filter': 'category',
- 'value': exclude_category,
- 'exclude': True,
- })
+ wiki_s = wiki_s.exclude(category__in=exclude_category)
# Locale filter
- filters_w.append({
- 'filter': 'locale',
- 'value': search_locale,
- })
+ wiki_s = wiki_s.filter(locale=language)
# Product filter
products = cleaned['product']
- if products:
- for p in products:
- filters_w.append({
- 'filter': 'tag',
- 'value': (crc32(p),),
- })
+ for p in products:
+ wiki_s = wiki_s.filter(tag=p)
# Tags filter
- tags = [crc32(t.strip()) for t in cleaned['tags'].split()]
- if tags:
- for t in tags:
- filters_w.append({
- 'filter': 'tag',
- 'value': (t,),
- })
+ tags = [t.strip() for t in cleaned['tags'].split()]
+ for t in tags:
+ wiki_s = wiki_s.filter(tag=t)
# Archived bit
if a == '0' and not cleaned['include_archived']:
# Default to NO for basic search:
cleaned['include_archived'] = False
if not cleaned['include_archived']:
- filters_w.append({
- 'filter': 'is_archived',
- 'value': (False,),
- })
+ wiki_s = wiki_s.filter(is_archived=False)
# End of wiki filters
# Support questions specific filters
@@ -176,57 +153,39 @@ def search(request, template=None):
# These filters are ternary, they can be either YES, NO, or OFF
ternary_filters = ('is_locked', 'is_solved', 'has_answers',
- 'has_helpful')
- filters_q.extend(_ternary_filter(filter_name, cleaned[filter_name])
- for filter_name in ternary_filters
- if cleaned[filter_name])
+ 'has_helpful')
+ d = dict((filter_name, _ternary_filter(cleaned[filter_name]))
+ for filter_name in ternary_filters
+ if cleaned[filter_name])
+ if d:
+ question_s = question_s.filter(**d)
if cleaned['asked_by']:
- filters_q.append({
- 'filter': 'question_creator',
- 'value': (crc32(cleaned['asked_by']),),
- })
+ question_s = question_s.filter(
+ question_creator=cleaned['asked_by'])
if cleaned['answered_by']:
- filters_q.append({
- 'filter': 'answer_creator',
- 'value': (crc32(cleaned['answered_by']),),
- })
-
- q_tags = [crc32(t.strip()) for t in cleaned['q_tags'].split()]
- if q_tags:
- for t in q_tags:
- filters_q.append({
- 'filter': 'tag',
- 'value': (t,),
- })
+ question_s = question_s.filter(
+ answer_creator=cleaned['answered_by'])
+
+ q_tags = [t.strip() for t in cleaned['q_tags'].split()]
+ for t in q_tags:
+ question_s = question_s.filter(tag=t)
# Discussion forum specific filters
if cleaned['w'] & constants.WHERE_DISCUSSION:
if cleaned['author']:
- filters_f.append({
- 'filter': 'author_ord',
- 'value': (crc32(cleaned['author']),),
- })
+ discussion_s = discussion_s.filter(author_ord=cleaned['author'])
if cleaned['thread_type']:
if constants.DISCUSSION_STICKY in cleaned['thread_type']:
- filters_f.append({
- 'filter': 'is_sticky',
- 'value': (1,),
- })
+ discussion_s = discussion_s.filter(is_sticky=1)
if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
- filters_f.append({
- 'filter': 'is_locked',
- 'value': (1,),
- })
+ discussion_s = discussion_s.filter(is_locked=1)
if cleaned['forum']:
- filters_f.append({
- 'filter': 'forum_id',
- 'value': cleaned['forum'],
- })
+ discussion_s = discussion_s.filter(forum_id=cleaned['forum'])
# Filters common to support and discussion forums
# Created filter
@@ -237,55 +196,68 @@ def search(request, template=None):
('question_votes', cleaned['num_voted'], cleaned['num_votes']))
for filter_name, filter_option, filter_date in interval_filters:
if filter_option == constants.INTERVAL_BEFORE:
- before = {
- 'range': True,
- 'filter': filter_name,
- 'min': 0,
- 'max': max(filter_date, 0),
- }
+ before = {filter_name + '__gte': 0,
+ filter_name + '__lte': max(filter_date, 0)}
+
if filter_name != 'question_votes':
- filters_f.append(before)
- filters_q.append(before)
+ discussion_s = discussion_s.filter(**before)
+ question_s = question_s.filter(**before)
elif filter_option == constants.INTERVAL_AFTER:
- after = {
- 'range': True,
- 'filter': filter_name,
- 'min': min(filter_date, unix_now),
- 'max': unix_now,
- }
+ after = {filter_name + '__gte': min(filter_date, unix_now),
+ filter_name + '__lte': unix_now}
+
if filter_name != 'question_votes':
- filters_f.append(after)
- filters_q.append(after)
+ discussion_s = discussion_s.filter(**after)
+ question_s = question_s.filter(**after)
sortby = smart_int(request.GET.get('sortby'))
try:
+ max_results = settings.SEARCH_MAX_RESULTS
+ cleaned_q = cleaned['q']
+
if cleaned['w'] & constants.WHERE_WIKI:
- wc = WikiClient() # Wiki SearchClient instance
+ wiki_s = wiki_s.query(cleaned_q)[:max_results]
# Execute the query and append to documents
- documents += wc.query(cleaned['q'], filters_w)
+ documents += [('wiki', (pair[0], pair[1]))
+ for pair in enumerate(wiki_s.object_ids())]
if cleaned['w'] & constants.WHERE_SUPPORT:
- qc = QuestionsClient() # Support question SearchClient instance
-
# Sort results by
try:
- qc.set_sort_mode(constants.SORT_QUESTIONS[sortby][0],
- constants.SORT_QUESTIONS[sortby][1])
+ question_s = question_s.order_by(
+ *constants.SORT_QUESTIONS[sortby])
except IndexError:
pass
- documents += qc.query(cleaned['q'], filters_q)
+ question_s = question_s.highlight(
+ 'content',
+ before_match='<b>',
+ after_match='</b>',
+ limit=settings.SEARCH_SUMMARY_LENGTH)
- if cleaned['w'] & constants.WHERE_DISCUSSION:
- dc = DiscussionClient() # Discussion forums SearchClient instance
+ question_s = question_s.query(cleaned_q)[:max_results]
+ documents += [('question', (pair[0], pair[1]))
+ for pair in enumerate(question_s.object_ids())]
+ if cleaned['w'] & constants.WHERE_DISCUSSION:
# Sort results by
try:
- dc.groupsort = constants.GROUPSORT[sortby]
+ # Note that the first attribute needs to be the same
+ # here and in forums/models.py discussion_search.
+ discussion_s = discussion_s.group_by(
+ 'thread_id', constants.GROUPSORT[sortby])
except IndexError:
pass
- documents += dc.query(cleaned['q'], filters_f)
+ discussion_s = discussion_s.highlight(
+ 'content',
+ before_match='<b>',
+ after_match='</b>',
+ limit=settings.SEARCH_SUMMARY_LENGTH)
+
+ discussion_s = discussion_s.query(cleaned_q)[:max_results]
+ documents += [('discussion', (pair[0], pair[1]))
+ for pair in enumerate(discussion_s.object_ids())]
except SearchError:
if is_json:
@@ -298,52 +270,74 @@ def search(request, template=None):
pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE)
+ # Build a dict of { type_ -> list of indexes } for the specific
+ # docs that we're going to display on this page. This makes it
+ # easy for us to slice the appropriate search Ss so we're limiting
+ # our db hits to just the items we're showing.
+ documents_dict = {}
+ for doc in documents[offset:offset + settings.SEARCH_RESULTS_PER_PAGE]:
+ documents_dict.setdefault(doc[0], []).append(doc[1][0])
+
+ docs_for_page = []
+ for type_, search_s in [('wiki', wiki_s),
+ ('question', question_s),
+ ('discussion', discussion_s)]:
+ if type_ not in documents_dict:
+ continue
+
+ # documents_dict[type_] is a list of indexes--one for each
+ # object id search result for that type_. We use the values
+ # at the beginning and end of the list for slice boundaries.
+ begin = documents_dict[type_][0]
+ end = documents_dict[type_][-1] + 1
+ docs_for_page += [(type_, doc) for doc in search_s[begin:end]]
+
results = []
- for i in range(offset, offset + settings.SEARCH_RESULTS_PER_PAGE):
+ for i, docinfo in enumerate(docs_for_page):
+ rank = i + offset
+ type_, doc = docinfo
try:
- if documents[i]['attrs'].get('category', False) != False:
- wiki_page = Document.objects.get(pk=documents[i]['id'])
- summary = wiki_page.current_revision.summary
+ if type_ == 'wiki':
+ summary = doc.current_revision.summary
result = {
'search_summary': summary,
- 'url': wiki_page.get_absolute_url(),
- 'title': wiki_page.title,
+ 'url': doc.get_absolute_url(),
+ 'title': doc.title,
'type': 'document',
- 'rank': i,
- 'object': wiki_page,
+ 'rank': rank,
+ 'object': doc,
}
results.append(result)
- elif documents[i]['attrs'].get('question_creator', False) != False:
- question = Question.objects.get(
- pk=documents[i]['attrs']['question_id'])
- excerpt = qc.excerpt(question.content, cleaned['q'])
- summary = jinja2.Markup(excerpt)
+ elif type_ == 'question':
+ summary = jinja2.Markup(
+ clean_excerpt(question_s.excerpt(doc)[0]))
result = {
'search_summary': summary,
- 'url': question.get_absolute_url(),
- 'title': question.title,
+ 'url': doc.get_absolute_url(),
+ 'title': doc.title,
'type': 'question',
- 'rank': i,
- 'object': question,
+ 'rank': rank,
+ 'object': doc,
}
results.append(result)
+
else:
- thread = Thread.objects.get(
- pk=documents[i]['attrs']['thread_id'])
- post = Post.objects.get(pk=documents[i]['id'])
+ # discussion_s is based on Post--not Thread, so we have
+ # to get this manually.
+ thread = Thread.objects.get(pk=doc.thread_id)
- excerpt = dc.excerpt(post.content, cleaned['q'])
- summary = jinja2.Markup(excerpt)
+ summary = jinja2.Markup(
+ clean_excerpt(discussion_s.excerpt(doc)[0]))
result = {
'search_summary': summary,
'url': thread.get_absolute_url(),
'title': thread.title,
'type': 'thread',
- 'rank': i,
+ 'rank': rank,
'object': thread,
}
results.append(result)
@@ -398,7 +392,9 @@ def suggestions(request):
site = Site.objects.get_current()
locale = locale_or_default(request.locale)
results = list(chain(
- wiki_search.filter(locale=locale).query(term)[:5],
+ wiki_search.filter(is_archived=False)
+ .filter(locale=locale)
+ .query(term)[:5],
question_search.filter(has_helpful=True).query(term)[:5]))
# Assumption: wiki_search sets filter(is_archived=False).
@@ -416,11 +412,10 @@ def plugin(request):
mimetype='application/opensearchdescription+xml')
-def _ternary_filter(filter_name, ternary_value):
+def _ternary_filter(ternary_value):
"""Return a search query given a TERNARY_YES or TERNARY_NO.
Behavior for TERNARY_OFF is undefined.
"""
- return {'filter': filter_name,
- 'value': (ternary_value == constants.TERNARY_YES,)}
+ return ternary_value == constants.TERNARY_YES
9 apps/wiki/models.py
View
@@ -250,7 +250,10 @@ class Meta(object):
class SphinxMeta(object):
index = 'wiki_pages'
- filter_mapping = {'locale': crc32, 'tag': crc32}
+ filter_mapping = {
+ 'locale': crc32,
+ 'tag': crc32,
+ }
def _collides(self, attr, value):
"""Return whether there exists a doc in this locale whose `attr` attr
@@ -876,7 +879,5 @@ def points_to_document_view(url, required_locale=None):
# Default search parameters for the wiki:
-wiki_search = (
- S(Document).filter(is_archived=False)
- .weight(title=6, content=1, keywords=4, summary=2))
+wiki_search = S(Document).weight(title=6, content=1, keywords=4, summary=2)
# TODO: We probably have several more default filters to add.
2  vendor/src/oedipus
@@ -1 +1 @@
-Subproject commit 6b8f0e5f245a51d8cbf252543521db73a312fc40
+Subproject commit 9a3ec4b4f75d99b329e766e8e30e50ca9f569b0f
Please sign in to comment.
Something went wrong with that request. Please try again.