Skip to content


Browse files Browse the repository at this point in the history
[Bug 894686] Step 2 - Search Documents by locale.
  • Loading branch information
mythmon committed Aug 1, 2013
1 parent 1212c97 commit 0040e6b
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 9 deletions.
4 changes: 3 additions & 1 deletion kitsune/questions/
Expand Up @@ -52,7 +52,8 @@
from kitsune.questions.models import (
Question, Answer, QuestionVote, AnswerVote, QuestionMappingType)
from kitsune.questions.question_config import products
from import ES_EXCEPTIONS, Sphilastic, F
from import (ES_EXCEPTIONS, Sphilastic, F,
from import locale_or_default, clean_excerpt
from kitsune.sumo.helpers import urlparams
from kitsune.sumo.urlresolvers import reverse
Expand Down Expand Up @@ -1406,6 +1407,7 @@ def _search_suggestions(request, text, locale, product_slugs):
for field in DocumentMappingType.get_query_fields())
query.update(dict(('%s__text_phrase' % field, text)
for field in DocumentMappingType.get_query_fields()))
query = es_query_with_analyzer(query, locale)
filter = F()
filter |= F(document_locale=locale)
filter |= F(document_locale=settings.WIKI_DEFAULT_LANGUAGE)
Expand Down
63 changes: 62 additions & 1 deletion kitsune/search/
Expand Up @@ -7,6 +7,7 @@
from django.db import reset_queries

import requests
from elasticutils import S as UntypedS
from elasticutils.contrib.django import S, F, get_es, ES_EXCEPTIONS # noqa
from pyelasticsearch.exceptions import ElasticHttpNotFoundError

Expand Down Expand Up @@ -45,7 +46,37 @@ class UnindexMeBro(Exception):

class Sphilastic(S):
class AnalyzerMixin(object):

def _with_analyzer(self, key, val, action):
"""Do a normal kind of query, with a analyzer added.
:arg key: is the field being searched
:arg val: Is a two-tupe of the text to query for and the name of
the analyzer to use.
:arg action: is the type of query being performed, like text or
query, analyzer = val
return {
action: {
key: {
'query': query,
'analyzer': analyzer,

def process_query_text_phrase_analyzer(self, key, val, action):
"""A text phrase query that includes an analyzer."""
return self._with_analyzer(key, val, 'text_phrase')

def process_query_text_analyzer(self, key, val, action):
"""A text query that includes an analyzer."""
return self._with_analyzer(key, val, 'text')

class Sphilastic(S, AnalyzerMixin):
"""Shim around elasticutils.contrib.django.S.
Implements some Kitsune-specific behavior to make our lives
Expand Down Expand Up @@ -79,6 +110,15 @@ def process_query_mlt(self, key, val, action):

class AnalyzerS(UntypedS, AnalyzerMixin):
"""This is to give the search view support for setting the analyzer.
This differs from Sphilastic in that this is a plain ES S object,
not based on Django.

def get_mappings():
mappings = {}

Expand Down Expand Up @@ -691,3 +731,24 @@ def es_analyzer_for_locale(locale, fallback="standard"):
analyzer = fallback

return analyzer

def es_query_with_analyzer(query, locale):
"""Transform a query dict to use _analyzer actions for the right fields."""
analyzer = es_analyzer_for_locale(locale)
new_query = {}

# Import locally to avoid circular import
from import get_mapping_types
localized_fields = []
for mt in get_mapping_types():

for k, v in query.items():
field, action = k.split('__')
if field in localized_fields:
new_query[k + '_analyzer'] = (v, analyzer)
new_query[k] = v

return new_query
4 changes: 4 additions & 0 deletions kitsune/search/
Expand Up @@ -105,6 +105,10 @@ def get_query_fields(cls):
"""Return the list of fields for query"""
raise NotImplementedError

def get_localized_fields(cls):
return []

def get_indexable(cls):
# Some models have a gazillion instances. So we want to go
Expand Down
18 changes: 18 additions & 0 deletions kitsune/search/tests/
Expand Up @@ -1149,6 +1149,24 @@ def test_analyzer_choices(self):
locale = doc['locale']
eq_(doc['_analyzer'], self.locale_data[locale]['analyzer'])

def test_query_analyzer_upgrader(self):
analyzer = 'snowball-english'
before = {
'document_title__text': 'foo',
'document_locale__text': 'bar',
'document_title__text_phrase': 'baz',
'document_locale__text_phrase': 'qux'
expected = {
'document_title__text_analyzer': ('foo', analyzer),
'document_locale__text': 'bar',
'document_title__text_phrase_analyzer': ('baz', analyzer),
'document_locale__text_phrase': 'qux',
actual = es_utils.es_query_with_analyzer(before, 'en-US')
eq_(actual, expected)

def _check_locale_tokenization(self, locale, expected_tokens, p_tag=True):
Check that a given locale's document was tokenized correctly.
Expand Down
14 changes: 9 additions & 5 deletions kitsune/search/
Expand Up @@ -14,7 +14,6 @@

import bleach
import jinja2
from elasticutils import S as UntypedS
from elasticutils.utils import format_explanation
from mobility.decorators import mobile_template
from statsd import statsd
Expand All @@ -28,7 +27,7 @@
from import locale_or_default, clean_excerpt, ComposedList
from import es_utils
from import SearchForm
from import ES_EXCEPTIONS, Sphilastic, F
from import ES_EXCEPTIONS, F, AnalyzerS
from kitsune.sumo.utils import paginate, smart_int
from import documents_for
from import Document, DocumentMappingType
Expand Down Expand Up @@ -130,8 +129,8 @@ def search(request, template=None):

# We use a regular S here because we want to search across
# multiple doctypes.
searcher = (UntypedS().es(urls=settings.ES_URLS)
searcher = (AnalyzerS().es(urls=settings.ES_URLS)

wiki_f = F(model='wiki_document')
question_f = F(model='questions_question')
Expand Down Expand Up @@ -350,14 +349,16 @@ def search(request, template=None):
if cleaned_q:
query_fields = chain(*[cls.get_query_fields()
for cls in get_mapping_types()])

query = {}
# Create text and text_phrase queries for every field
# we want to search.
for field in query_fields:
for query_type in ['text', 'text_phrase']:
query['%s__%s' % (field, query_type)] = cleaned_q

# Transform the query to use locale aware analyzers.
query = es_utils.es_query_with_analyzer(query, language)

searcher = searcher.query(should=True, **query)

num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)
Expand Down Expand Up @@ -525,6 +526,9 @@ def suggestions(request):
query = dict(('%s__text' % field, term)
for field in DocumentMappingType.get_query_fields())
# Upgrade the query to an analyzer-aware one.
query = es_utils.es_query_with_analyzer(query, locale)

wiki_s = (
Expand Down
4 changes: 2 additions & 2 deletions kitsune/
Expand Up @@ -585,10 +585,10 @@ def JINJA_CONFIG():
# Connection information for Elastic
ES_URLS = ['']
# Indexes for reading
ES_INDEXES = {'default': 'sumo-20130701'}
ES_INDEXES = {'default': 'sumo-20130723'}
# Indexes for indexing--set this to ES_INDEXES if you want to read to
# and write to the same index.
ES_WRITE_INDEXES = {'default': 'sumo-20130723'}
# This is prepended to index names to get the final read/write index
# names used by kitsune. This is so that you can have multiple
# environments pointed at the same ElasticSearch cluster and not have
Expand Down
9 changes: 9 additions & 0 deletions kitsune/wiki/
Expand Up @@ -674,6 +674,15 @@ def get_query_fields(cls):

def get_localized_fields(cls):
# This is the same list as `get_query_fields`, but it doesn't
# have to be, which is why it is typed twice.
return ['document_title',

def get_mapping(cls):
return {
Expand Down

0 comments on commit 0040e6b

Please sign in to comment.