Skip to content

Commit 0040e6b

Browse files
committed
[Bug 894686] Step 2 - Search Documents by locale.
1 parent 1212c97 commit 0040e6b

File tree

7 files changed

+107
-9
lines changed

7 files changed

+107
-9
lines changed

kitsune/questions/views.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@
5252
from kitsune.questions.models import (
5353
Question, Answer, QuestionVote, AnswerVote, QuestionMappingType)
5454
from kitsune.questions.question_config import products
55-
from kitsune.search.es_utils import ES_EXCEPTIONS, Sphilastic, F
55+
from kitsune.search.es_utils import (ES_EXCEPTIONS, Sphilastic, F,
56+
es_query_with_analyzer)
5657
from kitsune.search.utils import locale_or_default, clean_excerpt
5758
from kitsune.sumo.helpers import urlparams
5859
from kitsune.sumo.urlresolvers import reverse
@@ -1406,6 +1407,7 @@ def _search_suggestions(request, text, locale, product_slugs):
14061407
for field in DocumentMappingType.get_query_fields())
14071408
query.update(dict(('%s__text_phrase' % field, text)
14081409
for field in DocumentMappingType.get_query_fields()))
1410+
query = es_query_with_analyzer(query, locale)
14091411
filter = F()
14101412
filter |= F(document_locale=locale)
14111413
filter |= F(document_locale=settings.WIKI_DEFAULT_LANGUAGE)

kitsune/search/es_utils.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from django.db import reset_queries
88

99
import requests
10+
from elasticutils import S as UntypedS
1011
from elasticutils.contrib.django import S, F, get_es, ES_EXCEPTIONS # noqa
1112
from pyelasticsearch.exceptions import ElasticHttpNotFoundError
1213

@@ -45,7 +46,37 @@ class UnindexMeBro(Exception):
4546
pass
4647

4748

48-
class Sphilastic(S):
49+
class AnalyzerMixin(object):
50+
51+
def _with_analyzer(self, key, val, action):
52+
"""Do a normal kind of query, with a analyzer added.
53+
54+
:arg key: is the field being searched
55+
:arg val: Is a two-tupe of the text to query for and the name of
56+
the analyzer to use.
57+
:arg action: is the type of query being performed, like text or
58+
text_phrase
59+
"""
60+
query, analyzer = val
61+
return {
62+
action: {
63+
key: {
64+
'query': query,
65+
'analyzer': analyzer,
66+
}
67+
}
68+
}
69+
70+
def process_query_text_phrase_analyzer(self, key, val, action):
71+
"""A text phrase query that includes an analyzer."""
72+
return self._with_analyzer(key, val, 'text_phrase')
73+
74+
def process_query_text_analyzer(self, key, val, action):
75+
"""A text query that includes an analyzer."""
76+
return self._with_analyzer(key, val, 'text')
77+
78+
79+
class Sphilastic(S, AnalyzerMixin):
4980
"""Shim around elasticutils.contrib.django.S.
5081
5182
Implements some Kitsune-specific behavior to make our lives
@@ -79,6 +110,15 @@ def process_query_mlt(self, key, val, action):
79110
}
80111

81112

113+
class AnalyzerS(UntypedS, AnalyzerMixin):
114+
"""This is to give the search view support for setting the analyzer.
115+
116+
This differs from Sphilastic in that this is a plain ES S object,
117+
not based on Django.
118+
"""
119+
pass
120+
121+
82122
def get_mappings():
83123
mappings = {}
84124

@@ -691,3 +731,24 @@ def es_analyzer_for_locale(locale, fallback="standard"):
691731
analyzer = fallback
692732

693733
return analyzer
734+
735+
736+
def es_query_with_analyzer(query, locale):
737+
"""Transform a query dict to use _analyzer actions for the right fields."""
738+
analyzer = es_analyzer_for_locale(locale)
739+
new_query = {}
740+
741+
# Import locally to avoid circular import
742+
from kitsune.search.models import get_mapping_types
743+
localized_fields = []
744+
for mt in get_mapping_types():
745+
localized_fields.extend(mt.get_localized_fields())
746+
747+
for k, v in query.items():
748+
field, action = k.split('__')
749+
if field in localized_fields:
750+
new_query[k + '_analyzer'] = (v, analyzer)
751+
else:
752+
new_query[k] = v
753+
754+
return new_query

kitsune/search/models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ def get_query_fields(cls):
105105
"""Return the list of fields for query"""
106106
raise NotImplementedError
107107

108+
@classmethod
109+
def get_localized_fields(cls):
110+
return []
111+
108112
@classmethod
109113
def get_indexable(cls):
110114
# Some models have a gazillion instances. So we want to go

kitsune/search/tests/test_es.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,24 @@ def test_analyzer_choices(self):
11491149
locale = doc['locale']
11501150
eq_(doc['_analyzer'], self.locale_data[locale]['analyzer'])
11511151

1152+
def test_query_analyzer_upgrader(self):
1153+
analyzer = 'snowball-english'
1154+
before = {
1155+
'document_title__text': 'foo',
1156+
'document_locale__text': 'bar',
1157+
'document_title__text_phrase': 'baz',
1158+
'document_locale__text_phrase': 'qux'
1159+
}
1160+
expected = {
1161+
'document_title__text_analyzer': ('foo', analyzer),
1162+
'document_locale__text': 'bar',
1163+
'document_title__text_phrase_analyzer': ('baz', analyzer),
1164+
'document_locale__text_phrase': 'qux',
1165+
}
1166+
actual = es_utils.es_query_with_analyzer(before, 'en-US')
1167+
eq_(actual, expected)
1168+
1169+
11521170
def _check_locale_tokenization(self, locale, expected_tokens, p_tag=True):
11531171
"""
11541172
Check that a given locale's document was tokenized correctly.

kitsune/search/views.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
import bleach
1616
import jinja2
17-
from elasticutils import S as UntypedS
1817
from elasticutils.utils import format_explanation
1918
from mobility.decorators import mobile_template
2019
from statsd import statsd
@@ -28,7 +27,7 @@
2827
from kitsune.search.utils import locale_or_default, clean_excerpt, ComposedList
2928
from kitsune.search import es_utils
3029
from kitsune.search.forms import SearchForm
31-
from kitsune.search.es_utils import ES_EXCEPTIONS, Sphilastic, F
30+
from kitsune.search.es_utils import ES_EXCEPTIONS, F, AnalyzerS
3231
from kitsune.sumo.utils import paginate, smart_int
3332
from kitsune.wiki.facets import documents_for
3433
from kitsune.wiki.models import Document, DocumentMappingType
@@ -130,8 +129,8 @@ def search(request, template=None):
130129

131130
# We use a regular S here because we want to search across
132131
# multiple doctypes.
133-
searcher = (UntypedS().es(urls=settings.ES_URLS)
134-
.indexes(es_utils.READ_INDEX))
132+
searcher = (AnalyzerS().es(urls=settings.ES_URLS)
133+
.indexes(es_utils.READ_INDEX))
135134

136135
wiki_f = F(model='wiki_document')
137136
question_f = F(model='questions_question')
@@ -350,14 +349,16 @@ def search(request, template=None):
350349
if cleaned_q:
351350
query_fields = chain(*[cls.get_query_fields()
352351
for cls in get_mapping_types()])
353-
354352
query = {}
355353
# Create text and text_phrase queries for every field
356354
# we want to search.
357355
for field in query_fields:
358356
for query_type in ['text', 'text_phrase']:
359357
query['%s__%s' % (field, query_type)] = cleaned_q
360358

359+
# Transform the query to use locale aware analyzers.
360+
query = es_utils.es_query_with_analyzer(query, language)
361+
361362
searcher = searcher.query(should=True, **query)
362363

363364
num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)
@@ -525,6 +526,9 @@ def suggestions(request):
525526
try:
526527
query = dict(('%s__text' % field, term)
527528
for field in DocumentMappingType.get_query_fields())
529+
# Upgrade the query to an analyzer-aware one.
530+
query = es_utils.es_query_with_analyzer(query, locale)
531+
528532
wiki_s = (DocumentMappingType.search()
529533
.filter(document_is_archived=False)
530534
.filter(document_locale=locale)

kitsune/settings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -585,10 +585,10 @@ def JINJA_CONFIG():
585585
# Connection information for Elastic
586586
ES_URLS = ['http://127.0.0.1:9200']
587587
# Indexes for reading
588-
ES_INDEXES = {'default': 'sumo-20130701'}
588+
ES_INDEXES = {'default': 'sumo-20130723'}
589589
# Indexes for indexing--set this to ES_INDEXES if you want to read to
590590
# and write to the same index.
591-
ES_WRITE_INDEXES = {'default': 'sumo-20130723'}
591+
ES_WRITE_INDEXES = ES_INDEXES
592592
# This is prepended to index names to get the final read/write index
593593
# names used by kitsune. This is so that you can have multiple
594594
# environments pointed at the same ElasticSearch cluster and not have

kitsune/wiki/models.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,15 @@ def get_query_fields(cls):
674674
'document_summary',
675675
'document_keywords']
676676

677+
@classmethod
678+
def get_localized_fields(cls):
679+
# This is the same list as `get_query_fields`, but it doesn't
680+
# have to be, which is why it is typed twice.
681+
return ['document_title',
682+
'document_content',
683+
'document_summary',
684+
'document_keywords']
685+
677686
@classmethod
678687
def get_mapping(cls):
679688
return {

0 commit comments

Comments
 (0)