Permalink
Browse files

[Bug 894686] Step 2 - Search Documents by locale.

  • Loading branch information...
1 parent 1212c97 commit 0040e6b2db03c9d4e894cda13f10cd27537414b5 @mythmon committed Jul 25, 2013
@@ -52,7 +52,8 @@
from kitsune.questions.models import (
Question, Answer, QuestionVote, AnswerVote, QuestionMappingType)
from kitsune.questions.question_config import products
-from kitsune.search.es_utils import ES_EXCEPTIONS, Sphilastic, F
+from kitsune.search.es_utils import (ES_EXCEPTIONS, Sphilastic, F,
+ es_query_with_analyzer)
from kitsune.search.utils import locale_or_default, clean_excerpt
from kitsune.sumo.helpers import urlparams
from kitsune.sumo.urlresolvers import reverse
@@ -1406,6 +1407,7 @@ def _search_suggestions(request, text, locale, product_slugs):
for field in DocumentMappingType.get_query_fields())
query.update(dict(('%s__text_phrase' % field, text)
for field in DocumentMappingType.get_query_fields()))
+ query = es_query_with_analyzer(query, locale)
filter = F()
filter |= F(document_locale=locale)
filter |= F(document_locale=settings.WIKI_DEFAULT_LANGUAGE)
@@ -7,6 +7,7 @@
from django.db import reset_queries
import requests
+from elasticutils import S as UntypedS
from elasticutils.contrib.django import S, F, get_es, ES_EXCEPTIONS # noqa
from pyelasticsearch.exceptions import ElasticHttpNotFoundError
@@ -45,7 +46,37 @@ class UnindexMeBro(Exception):
pass
-class Sphilastic(S):
+class AnalyzerMixin(object):
+
+ def _with_analyzer(self, key, val, action):
+ """Do a normal kind of query, with a analyzer added.
+
+ :arg key: is the field being searched
+ :arg val: Is a two-tupe of the text to query for and the name of
+ the analyzer to use.
+ :arg action: is the type of query being performed, like text or
+ text_phrase
+ """
+ query, analyzer = val
+ return {
+ action: {
+ key: {
+ 'query': query,
+ 'analyzer': analyzer,
+ }
+ }
+ }
+
+ def process_query_text_phrase_analyzer(self, key, val, action):
+ """A text phrase query that includes an analyzer."""
+ return self._with_analyzer(key, val, 'text_phrase')
+
+ def process_query_text_analyzer(self, key, val, action):
+ """A text query that includes an analyzer."""
+ return self._with_analyzer(key, val, 'text')
+
+
+class Sphilastic(S, AnalyzerMixin):
"""Shim around elasticutils.contrib.django.S.
Implements some Kitsune-specific behavior to make our lives
@@ -79,6 +110,15 @@ def process_query_mlt(self, key, val, action):
}
+class AnalyzerS(UntypedS, AnalyzerMixin):
+ """This is to give the search view support for setting the analyzer.
+
+ This differs from Sphilastic in that this is a plain ES S object,
+ not based on Django.
+ """
+ pass
+
+
def get_mappings():
mappings = {}
@@ -691,3 +731,24 @@ def es_analyzer_for_locale(locale, fallback="standard"):
analyzer = fallback
return analyzer
+
+
+def es_query_with_analyzer(query, locale):
+ """Transform a query dict to use _analyzer actions for the right fields."""
+ analyzer = es_analyzer_for_locale(locale)
+ new_query = {}
+
+ # Import locally to avoid circular import
+ from kitsune.search.models import get_mapping_types
+ localized_fields = []
+ for mt in get_mapping_types():
+ localized_fields.extend(mt.get_localized_fields())
+
+ for k, v in query.items():
+ field, action = k.split('__')
+ if field in localized_fields:
+ new_query[k + '_analyzer'] = (v, analyzer)
+ else:
+ new_query[k] = v
+
+ return new_query
@@ -106,6 +106,10 @@ def get_query_fields(cls):
raise NotImplementedError
@classmethod
+ def get_localized_fields(cls):
+ return []
+
+ @classmethod
def get_indexable(cls):
# Some models have a gazillion instances. So we want to go
# through them one at a time in a way that doesn't pull all
@@ -1149,6 +1149,24 @@ def test_analyzer_choices(self):
locale = doc['locale']
eq_(doc['_analyzer'], self.locale_data[locale]['analyzer'])
+ def test_query_analyzer_upgrader(self):
+ analyzer = 'snowball-english'
+ before = {
+ 'document_title__text': 'foo',
+ 'document_locale__text': 'bar',
+ 'document_title__text_phrase': 'baz',
+ 'document_locale__text_phrase': 'qux'
+ }
+ expected = {
+ 'document_title__text_analyzer': ('foo', analyzer),
+ 'document_locale__text': 'bar',
+ 'document_title__text_phrase_analyzer': ('baz', analyzer),
+ 'document_locale__text_phrase': 'qux',
+ }
+ actual = es_utils.es_query_with_analyzer(before, 'en-US')
+ eq_(actual, expected)
+
+
def _check_locale_tokenization(self, locale, expected_tokens, p_tag=True):
"""
Check that a given locale's document was tokenized correctly.
@@ -14,7 +14,6 @@
import bleach
import jinja2
-from elasticutils import S as UntypedS
from elasticutils.utils import format_explanation
from mobility.decorators import mobile_template
from statsd import statsd
@@ -28,7 +27,7 @@
from kitsune.search.utils import locale_or_default, clean_excerpt, ComposedList
from kitsune.search import es_utils
from kitsune.search.forms import SearchForm
-from kitsune.search.es_utils import ES_EXCEPTIONS, Sphilastic, F
+from kitsune.search.es_utils import ES_EXCEPTIONS, F, AnalyzerS
from kitsune.sumo.utils import paginate, smart_int
from kitsune.wiki.facets import documents_for
from kitsune.wiki.models import Document, DocumentMappingType
@@ -130,8 +129,8 @@ def search(request, template=None):
# We use a regular S here because we want to search across
# multiple doctypes.
- searcher = (UntypedS().es(urls=settings.ES_URLS)
- .indexes(es_utils.READ_INDEX))
+ searcher = (AnalyzerS().es(urls=settings.ES_URLS)
+ .indexes(es_utils.READ_INDEX))
wiki_f = F(model='wiki_document')
question_f = F(model='questions_question')
@@ -350,14 +349,16 @@ def search(request, template=None):
if cleaned_q:
query_fields = chain(*[cls.get_query_fields()
for cls in get_mapping_types()])
-
query = {}
# Create text and text_phrase queries for every field
# we want to search.
for field in query_fields:
for query_type in ['text', 'text_phrase']:
query['%s__%s' % (field, query_type)] = cleaned_q
+ # Transform the query to use locale aware analyzers.
+ query = es_utils.es_query_with_analyzer(query, language)
+
searcher = searcher.query(should=True, **query)
num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)
@@ -525,6 +526,9 @@ def suggestions(request):
try:
query = dict(('%s__text' % field, term)
for field in DocumentMappingType.get_query_fields())
+ # Upgrade the query to an analyzer-aware one.
+ query = es_utils.es_query_with_analyzer(query, locale)
+
wiki_s = (DocumentMappingType.search()
.filter(document_is_archived=False)
.filter(document_locale=locale)
View
@@ -585,10 +585,10 @@ def JINJA_CONFIG():
# Connection information for Elastic
ES_URLS = ['http://127.0.0.1:9200']
# Indexes for reading
-ES_INDEXES = {'default': 'sumo-20130701'}
+ES_INDEXES = {'default': 'sumo-20130723'}
# Indexes for indexing--set this to ES_INDEXES if you want to read to
# and write to the same index.
-ES_WRITE_INDEXES = {'default': 'sumo-20130723'}
+ES_WRITE_INDEXES = ES_INDEXES
# This is prepended to index names to get the final read/write index
# names used by kitsune. This is so that you can have multiple
# environments pointed at the same ElasticSearch cluster and not have
@@ -675,6 +675,15 @@ def get_query_fields(cls):
'document_keywords']
@classmethod
+ def get_localized_fields(cls):
+ # This is the same list as `get_query_fields`, but it doesn't
+ # have to be, which is why it is typed twice.
+ return ['document_title',
+ 'document_content',
+ 'document_summary',
+ 'document_keywords']
+
+ @classmethod
def get_mapping(cls):
return {
'properties': {

0 comments on commit 0040e6b

Please sign in to comment.