Browse files

[Bug 894686] Step 2 - Search Documents by locale.

  • Loading branch information...
mythmon committed Jul 25, 2013
1 parent 1212c97 commit 0040e6b2db03c9d4e894cda13f10cd27537414b5
@@ -52,7 +52,8 @@
from kitsune.questions.models import (
Question, Answer, QuestionVote, AnswerVote, QuestionMappingType)
from kitsune.questions.question_config import products
from import ES_EXCEPTIONS, Sphilastic, F
from import (ES_EXCEPTIONS, Sphilastic, F,
from import locale_or_default, clean_excerpt
from kitsune.sumo.helpers import urlparams
from kitsune.sumo.urlresolvers import reverse
@@ -1406,6 +1407,7 @@ def _search_suggestions(request, text, locale, product_slugs):
for field in DocumentMappingType.get_query_fields())
query.update(dict(('%s__text_phrase' % field, text)
for field in DocumentMappingType.get_query_fields()))
query = es_query_with_analyzer(query, locale)
filter = F()
filter |= F(document_locale=locale)
filter |= F(document_locale=settings.WIKI_DEFAULT_LANGUAGE)
@@ -7,6 +7,7 @@
from django.db import reset_queries
import requests
from elasticutils import S as UntypedS
from elasticutils.contrib.django import S, F, get_es, ES_EXCEPTIONS # noqa
from pyelasticsearch.exceptions import ElasticHttpNotFoundError
@@ -45,7 +46,37 @@ class UnindexMeBro(Exception):
class Sphilastic(S):
class AnalyzerMixin(object):
def _with_analyzer(self, key, val, action):
"""Do a normal kind of query, with a analyzer added.
:arg key: is the field being searched
:arg val: Is a two-tupe of the text to query for and the name of
the analyzer to use.
:arg action: is the type of query being performed, like text or
query, analyzer = val
return {
action: {
key: {
'query': query,
'analyzer': analyzer,
def process_query_text_phrase_analyzer(self, key, val, action):
"""A text phrase query that includes an analyzer."""
return self._with_analyzer(key, val, 'text_phrase')
def process_query_text_analyzer(self, key, val, action):
"""A text query that includes an analyzer."""
return self._with_analyzer(key, val, 'text')
class Sphilastic(S, AnalyzerMixin):
"""Shim around elasticutils.contrib.django.S.
Implements some Kitsune-specific behavior to make our lives
@@ -79,6 +110,15 @@ def process_query_mlt(self, key, val, action):
class AnalyzerS(UntypedS, AnalyzerMixin):
"""This is to give the search view support for setting the analyzer.
This differs from Sphilastic in that this is a plain ES S object,
not based on Django.
def get_mappings():
mappings = {}
@@ -691,3 +731,24 @@ def es_analyzer_for_locale(locale, fallback="standard"):
analyzer = fallback
return analyzer
def es_query_with_analyzer(query, locale):
"""Transform a query dict to use _analyzer actions for the right fields."""
analyzer = es_analyzer_for_locale(locale)
new_query = {}
# Import locally to avoid circular import
from import get_mapping_types
localized_fields = []
for mt in get_mapping_types():
for k, v in query.items():
field, action = k.split('__')
if field in localized_fields:
new_query[k + '_analyzer'] = (v, analyzer)
new_query[k] = v
return new_query
@@ -105,6 +105,10 @@ def get_query_fields(cls):
"""Return the list of fields for query"""
raise NotImplementedError
def get_localized_fields(cls):
return []
def get_indexable(cls):
# Some models have a gazillion instances. So we want to go
@@ -1149,6 +1149,24 @@ def test_analyzer_choices(self):
locale = doc['locale']
eq_(doc['_analyzer'], self.locale_data[locale]['analyzer'])
def test_query_analyzer_upgrader(self):
analyzer = 'snowball-english'
before = {
'document_title__text': 'foo',
'document_locale__text': 'bar',
'document_title__text_phrase': 'baz',
'document_locale__text_phrase': 'qux'
expected = {
'document_title__text_analyzer': ('foo', analyzer),
'document_locale__text': 'bar',
'document_title__text_phrase_analyzer': ('baz', analyzer),
'document_locale__text_phrase': 'qux',
actual = es_utils.es_query_with_analyzer(before, 'en-US')
eq_(actual, expected)
def _check_locale_tokenization(self, locale, expected_tokens, p_tag=True):
Check that a given locale's document was tokenized correctly.
@@ -14,7 +14,6 @@
import bleach
import jinja2
from elasticutils import S as UntypedS
from elasticutils.utils import format_explanation
from mobility.decorators import mobile_template
from statsd import statsd
@@ -28,7 +27,7 @@
from import locale_or_default, clean_excerpt, ComposedList
from import es_utils
from import SearchForm
from import ES_EXCEPTIONS, Sphilastic, F
from import ES_EXCEPTIONS, F, AnalyzerS
from kitsune.sumo.utils import paginate, smart_int
from import documents_for
from import Document, DocumentMappingType
@@ -130,8 +129,8 @@ def search(request, template=None):
# We use a regular S here because we want to search across
# multiple doctypes.
searcher = (UntypedS().es(urls=settings.ES_URLS)
searcher = (AnalyzerS().es(urls=settings.ES_URLS)
wiki_f = F(model='wiki_document')
question_f = F(model='questions_question')
@@ -350,14 +349,16 @@ def search(request, template=None):
if cleaned_q:
query_fields = chain(*[cls.get_query_fields()
for cls in get_mapping_types()])
query = {}
# Create text and text_phrase queries for every field
# we want to search.
for field in query_fields:
for query_type in ['text', 'text_phrase']:
query['%s__%s' % (field, query_type)] = cleaned_q
# Transform the query to use locale aware analyzers.
query = es_utils.es_query_with_analyzer(query, language)
searcher = searcher.query(should=True, **query)
num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)
@@ -525,6 +526,9 @@ def suggestions(request):
query = dict(('%s__text' % field, term)
for field in DocumentMappingType.get_query_fields())
# Upgrade the query to an analyzer-aware one.
query = es_utils.es_query_with_analyzer(query, locale)
wiki_s = (
@@ -585,10 +585,10 @@ def JINJA_CONFIG():
# Connection information for Elastic
ES_URLS = ['']
# Indexes for reading
ES_INDEXES = {'default': 'sumo-20130701'}
ES_INDEXES = {'default': 'sumo-20130723'}
# Indexes for indexing--set this to ES_INDEXES if you want to read to
# and write to the same index.
ES_WRITE_INDEXES = {'default': 'sumo-20130723'}
# This is prepended to index names to get the final read/write index
# names used by kitsune. This is so that you can have multiple
# environments pointed at the same ElasticSearch cluster and not have
@@ -674,6 +674,15 @@ def get_query_fields(cls):
def get_localized_fields(cls):
# This is the same list as `get_query_fields`, but it doesn't
# have to be, which is why it is typed twice.
return ['document_title',
def get_mapping(cls):
return {

0 comments on commit 0040e6b

Please sign in to comment.