Skip to content
Browse files

[Bug 894686] Step 2 - Search Documents by locale.

  • Loading branch information...
mythmon committed Jul 25, 2013
1 parent 1212c97 commit 0040e6b2db03c9d4e894cda13f10cd27537414b5
@@ -52,7 +52,8 @@
from kitsune.questions.models import (
Question, Answer, QuestionVote, AnswerVote, QuestionMappingType)
from kitsune.questions.question_config import products
from import ES_EXCEPTIONS, Sphilastic, F
from import (ES_EXCEPTIONS, Sphilastic, F,
from import locale_or_default, clean_excerpt
from kitsune.sumo.helpers import urlparams
from kitsune.sumo.urlresolvers import reverse
@@ -1406,6 +1407,7 @@ def _search_suggestions(request, text, locale, product_slugs):
for field in DocumentMappingType.get_query_fields())
query.update(dict(('%s__text_phrase' % field, text)
for field in DocumentMappingType.get_query_fields()))
query = es_query_with_analyzer(query, locale)
filter = F()
filter |= F(document_locale=locale)
filter |= F(document_locale=settings.WIKI_DEFAULT_LANGUAGE)
@@ -7,6 +7,7 @@
from django.db import reset_queries

import requests
from elasticutils import S as UntypedS
from elasticutils.contrib.django import S, F, get_es, ES_EXCEPTIONS # noqa
from pyelasticsearch.exceptions import ElasticHttpNotFoundError

@@ -45,7 +46,37 @@ class UnindexMeBro(Exception):

class Sphilastic(S):
class AnalyzerMixin(object):

def _with_analyzer(self, key, val, action):
"""Do a normal kind of query, with a analyzer added.
:arg key: is the field being searched
:arg val: Is a two-tupe of the text to query for and the name of
the analyzer to use.
:arg action: is the type of query being performed, like text or
query, analyzer = val
return {
action: {
key: {
'query': query,
'analyzer': analyzer,

def process_query_text_phrase_analyzer(self, key, val, action):
"""A text phrase query that includes an analyzer."""
return self._with_analyzer(key, val, 'text_phrase')

def process_query_text_analyzer(self, key, val, action):
"""A text query that includes an analyzer."""
return self._with_analyzer(key, val, 'text')

class Sphilastic(S, AnalyzerMixin):
"""Shim around elasticutils.contrib.django.S.
Implements some Kitsune-specific behavior to make our lives
@@ -79,6 +110,15 @@ def process_query_mlt(self, key, val, action):

class AnalyzerS(UntypedS, AnalyzerMixin):
"""This is to give the search view support for setting the analyzer.
This differs from Sphilastic in that this is a plain ES S object,
not based on Django.

def get_mappings():
mappings = {}

@@ -691,3 +731,24 @@ def es_analyzer_for_locale(locale, fallback="standard"):
analyzer = fallback

return analyzer

def es_query_with_analyzer(query, locale):
"""Transform a query dict to use _analyzer actions for the right fields."""
analyzer = es_analyzer_for_locale(locale)
new_query = {}

# Import locally to avoid circular import
from import get_mapping_types
localized_fields = []
for mt in get_mapping_types():

for k, v in query.items():
field, action = k.split('__')
if field in localized_fields:
new_query[k + '_analyzer'] = (v, analyzer)
new_query[k] = v

return new_query
@@ -105,6 +105,10 @@ def get_query_fields(cls):
"""Return the list of fields for query"""
raise NotImplementedError

def get_localized_fields(cls):
return []

def get_indexable(cls):
# Some models have a gazillion instances. So we want to go
@@ -1149,6 +1149,24 @@ def test_analyzer_choices(self):
locale = doc['locale']
eq_(doc['_analyzer'], self.locale_data[locale]['analyzer'])

def test_query_analyzer_upgrader(self):
analyzer = 'snowball-english'
before = {
'document_title__text': 'foo',
'document_locale__text': 'bar',
'document_title__text_phrase': 'baz',
'document_locale__text_phrase': 'qux'
expected = {
'document_title__text_analyzer': ('foo', analyzer),
'document_locale__text': 'bar',
'document_title__text_phrase_analyzer': ('baz', analyzer),
'document_locale__text_phrase': 'qux',
actual = es_utils.es_query_with_analyzer(before, 'en-US')
eq_(actual, expected)

def _check_locale_tokenization(self, locale, expected_tokens, p_tag=True):
Check that a given locale's document was tokenized correctly.
@@ -14,7 +14,6 @@

import bleach
import jinja2
from elasticutils import S as UntypedS
from elasticutils.utils import format_explanation
from mobility.decorators import mobile_template
from statsd import statsd
@@ -28,7 +27,7 @@
from import locale_or_default, clean_excerpt, ComposedList
from import es_utils
from import SearchForm
from import ES_EXCEPTIONS, Sphilastic, F
from import ES_EXCEPTIONS, F, AnalyzerS
from kitsune.sumo.utils import paginate, smart_int
from import documents_for
from import Document, DocumentMappingType
@@ -130,8 +129,8 @@ def search(request, template=None):

# We use a regular S here because we want to search across
# multiple doctypes.
searcher = (UntypedS().es(urls=settings.ES_URLS)
searcher = (AnalyzerS().es(urls=settings.ES_URLS)

wiki_f = F(model='wiki_document')
question_f = F(model='questions_question')
@@ -350,14 +349,16 @@ def search(request, template=None):
if cleaned_q:
query_fields = chain(*[cls.get_query_fields()
for cls in get_mapping_types()])

query = {}
# Create text and text_phrase queries for every field
# we want to search.
for field in query_fields:
for query_type in ['text', 'text_phrase']:
query['%s__%s' % (field, query_type)] = cleaned_q

# Transform the query to use locale aware analyzers.
query = es_utils.es_query_with_analyzer(query, language)

searcher = searcher.query(should=True, **query)

num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)
@@ -525,6 +526,9 @@ def suggestions(request):
query = dict(('%s__text' % field, term)
for field in DocumentMappingType.get_query_fields())
# Upgrade the query to an analyzer-aware one.
query = es_utils.es_query_with_analyzer(query, locale)

wiki_s = (
@@ -585,10 +585,10 @@ def JINJA_CONFIG():
# Connection information for Elastic
ES_URLS = ['']
# Indexes for reading
ES_INDEXES = {'default': 'sumo-20130701'}
ES_INDEXES = {'default': 'sumo-20130723'}
# Indexes for indexing--set this to ES_INDEXES if you want to read to
# and write to the same index.
ES_WRITE_INDEXES = {'default': 'sumo-20130723'}
# This is prepended to index names to get the final read/write index
# names used by kitsune. This is so that you can have multiple
# environments pointed at the same ElasticSearch cluster and not have
@@ -674,6 +674,15 @@ def get_query_fields(cls):

def get_localized_fields(cls):
# This is the same list as `get_query_fields`, but it doesn't
# have to be, which is why it is typed twice.
return ['document_title',

def get_mapping(cls):
return {

0 comments on commit 0040e6b

Please sign in to comment.
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.