Skip to content
This repository has been archived by the owner on Mar 15, 2018. It is now read-only.

Commit

Permalink
Update all ES queries to use Elasticsearch DSL (bug 1036168)
Browse files Browse the repository at this point in the history
  • Loading branch information
robhudson committed Jul 21, 2014
1 parent 63c1060 commit 01af1a1
Show file tree
Hide file tree
Showing 30 changed files with 409 additions and 335 deletions.
28 changes: 14 additions & 14 deletions .baked
Expand Up @@ -48,20 +48,20 @@
"django_extensions", "django_filters", "django_nose",
"django_qunit", "django_statsd", "django_tables", "djangoraven",
"djcelery", "dmemcache", "docopt", "docs", "docutils",
"easy_thumbnails", "elasticsearch", "email_reply_parser",
"encutils", "fabric", "fancy_tag", "fastchardet", "feedparser",
"fixture_magic", "flake8", "fudge", "funtests", "gflags",
"gflags_validators", "git", "glue", "google", "gunicorn",
"happyforms", "heka", "heka_cef", "heka_raven", "hera", "html5lib",
"httplib2", "importlib", "ipdb", "jingo", "jingo", "jingo_minify",
"jinja2", "json_field", "jwt", "kombu", "lxml", "m2secret",
"markupsafe", "mccabe", "mdn_theme", "memcache", "memcachepool",
"metlog", "metlog_cef", "metlog_raven", "mimeparse", "mock",
"monolith", "mozilla_logger", "mozpay", "multidb", "mysql_pool",
"mysql_pool", "mysqlmysql", "ndg", "nose", "nose", "oauth2",
"oauth2client", "oauthlib", "ordereddict", "path", "packager",
"paramiko", "pep8", "picklefield", "piston", "polib",
"product_details", "py", "pyasn1", "pyflakes", "pygments",
"easy_thumbnails", "elasticsearch", "elasticsearch_dsl",
"email_reply_parser", "encutils", "fabric", "fancy_tag",
"fastchardet", "feedparser", "fixture_magic", "flake8", "fudge",
"funtests", "gflags", "gflags_validators", "git", "glue", "google",
"gunicorn", "happyforms", "heka", "heka_cef", "heka_raven", "hera",
"html5lib", "httplib2", "importlib", "ipdb", "jingo", "jingo",
"jingo_minify", "jinja2", "json_field", "jwt", "kombu", "lxml",
"m2secret", "markupsafe", "mccabe", "mdn_theme", "memcache",
"memcachepool", "metlog", "metlog_cef", "metlog_raven",
"mimeparse", "mock", "monolith", "mozilla_logger", "mozpay",
"multidb", "mysql_pool", "mysql_pool", "mysqlmysql", "ndg", "nose",
"nose", "oauth2", "oauth2client", "oauthlib", "ordereddict",
"path", "packager", "paramiko", "pep8", "picklefield", "piston",
"polib", "product_details", "py", "pyasn1", "pyflakes", "pygments",
"pymemcache", "pymysql", "pyquery", "pytz", "queryset_transform",
"quieter_formset", "ratelimit", "raven", "rdflib", "receipts",
"redis", "redis", "redisutils", "requests", "rest_framework",
Expand Down
7 changes: 4 additions & 3 deletions apps/amo/monitors.py
Expand Up @@ -8,7 +8,7 @@
from django.conf import settings

import commonware.log
import elasticutils.contrib.django as elasticutils
import elasticsearch
import requests
from cache_nuggets.lib import memoize
from PIL import Image
Expand Down Expand Up @@ -105,14 +105,15 @@ def libraries():


def elastic():
es = elasticsearch.Elasticsearch(hosts=settings.ES_HOSTS)
elastic_results = None
status = ''
try:
health = elasticutils.get_es().health()
health = es.cluster.health()
if health['status'] == 'red':
status = 'ES is red'
elastic_results = health
except Exception:
except elasticsearch.ElasticsearchException:
monitor_log.exception('Failed to communicate with ES')
elastic_results = {'error': traceback.format_exc()}
status = 'traceback'
Expand Down
11 changes: 1 addition & 10 deletions apps/amo/tests/__init__.py
Expand Up @@ -21,7 +21,6 @@

import caching
import elasticsearch
import elasticutils.contrib.django as elasticutils
import mock
import test_utils
import tower
Expand All @@ -44,9 +43,6 @@
from mkt.access.acl import check_ownership
from mkt.access.models import Group, GroupUser
from mkt.constants import regions
from mkt.feed.indexers import (FeedAppIndexer, FeedBrandIndexer,
FeedCollectionIndexer, FeedItemIndexer,
FeedShelfIndexer)
from mkt.files.helpers import copyfileobj
from mkt.files.models import File, Platform
from mkt.prices.models import AddonPremium, Price, PriceCurrency
Expand All @@ -55,7 +51,6 @@
from mkt.translations.models import Translation
from mkt.users.models import UserProfile
from mkt.versions.models import Version
from mkt.webapps.indexers import WebappIndexer
from mkt.webapps.models import update_search_index as app_update_search_index
from mkt.webapps.models import Addon, Webapp
from mkt.webapps.tasks import unindex_webapps
Expand Down Expand Up @@ -175,8 +170,7 @@ def __getattr__(self, name):
raise AttributeError


ES_patchers = [mock.patch('elasticutils.contrib.django', spec=True),
mock.patch('elasticsearch.Elasticsearch'),
ES_patchers = [mock.patch('elasticsearch.Elasticsearch'),
mock.patch('mkt.webapps.tasks.WebappIndexer', spec=True),
mock.patch('mkt.webapps.tasks.Reindexing', spec=True,
side_effect=lambda i: [i])]
Expand All @@ -191,9 +185,6 @@ def stop_es_mock():
for patch in ES_patchers:
patch.stop()

if hasattr(elasticutils, '_local') and hasattr(elasticutils._local, 'es'):
delattr(elasticutils._local, 'es')

# Reset cached Elasticsearch objects.
BaseIndexer._es = {}

Expand Down
4 changes: 2 additions & 2 deletions apps/amo/utils.py
Expand Up @@ -34,14 +34,14 @@

import bleach
import chardet
import elasticutils.contrib.django as elasticutils
import html5lib
import jinja2
import pytz
from babel import Locale
from cef import log_cef as _log_cef
from django_statsd.clients import statsd
from easy_thumbnails import processors
from elasticsearch_dsl.search import Search
from html5lib.serializer.htmlserializer import HTMLSerializer
from jingo import env
from PIL import Image, ImageFile, PngImagePlugin
Expand Down Expand Up @@ -120,7 +120,7 @@ def paginate(request, queryset, per_page=20, count=None):
``.count()`` on the queryset. This can be good if the queryset would
produce an expensive count query.
"""
p = (ESPaginator if isinstance(queryset, elasticutils.S)
p = (ESPaginator if isinstance(queryset, Search)
else paginator.Paginator)(queryset, per_page)

if count is not None:
Expand Down
15 changes: 3 additions & 12 deletions docs/topics/install-zamboni/elasticsearch.rst
Expand Up @@ -82,20 +82,11 @@ If you need to use another settings file and add arguments::
Querying Elasticsearch in Django
--------------------------------

We use `elasticutils <http://github.com/mozilla/elasticutils>`_, a Python
library that gives us a search API to elasticsearch.

We attach elasticutils to Django models with a mixin. This lets us do things
like ``.search()`` which returns an object which acts a lot like Django's ORM's
object manager. ``.filter(**kwargs)`` can be run on this search object::

query_results = list(
MyModel.search().filter(my_field=a_str.lower())
.values_dict('that_field'))
We use `Elasticsearch DSL <https://github.com/elasticsearch/elasticsearch-dsl-py>`_,
a Python library that gives us a search API to elasticsearch.

On Marketplace, apps use ``mkt/webapps/indexers:WebappIndexer`` as its
interface to Elasticsearch. Search is done a little differently using
this and results are a list of ``WebappIndexer`` objects::
interface to Elasticsearch::

query_results = WebappIndexer.search().filter(...)

Expand Down
3 changes: 1 addition & 2 deletions lib/log_settings_base.py
Expand Up @@ -3,7 +3,6 @@

from django.conf import settings

from raven.contrib.django.handlers import SentryHandler
import commonware.log
import dictconfig

Expand Down Expand Up @@ -80,7 +79,7 @@
'newrelic': {
'level': 'WARNING',
},
'elasticutils': {
'elasticsearch': {
'level': 'WARNING',
},
'suds': {
Expand Down
12 changes: 5 additions & 7 deletions mkt/api/paginator.py
Expand Up @@ -45,13 +45,11 @@ def page(self, number):

# Force the search to evaluate and then attach the count. We want to
# avoid an extra useless query even if there are no results, so we
# directly fetch the count from _results_cache instead of calling
# page.object_list.count().
# FIXME: replace by simply calling page.object_list.count() when
# https://github.com/mozilla/elasticutils/pull/212 is merged and
# released.
page.object_list.execute()
self._count = page.object_list._results_cache.count
# directly fetch the count from hits.
# Overwrite `object_list` with the list of ES results.
page.object_list = page.object_list.execute().hits
# Update the `_count`.
self._count = page.object_list.total

return page

Expand Down
30 changes: 18 additions & 12 deletions mkt/api/tests/test_paginator.py
Expand Up @@ -3,27 +3,33 @@
from django.core.paginator import Paginator
from django.http import QueryDict

import mock
from elasticutils.contrib.django import S
from nose.tools import eq_
from test_utils import RequestFactory

from amo.tests import TestCase
from amo.tests import ESTestCase, TestCase

from mkt.api.paginator import MetaSerializer, ESPaginator
from mkt.webapps.indexers import WebappIndexer


class TestSearchPaginator(TestCase):
class TestSearchPaginator(ESTestCase):

# TODO: When we update searching update this also.
# @mock.patch('elasticsearch.connection.http_requests'
# '.RequestsHttpConnection.perform_request')
@mock.patch('pyelasticsearch.client.ElasticSearch.send_request')
def test_single_hit(self, _mock):
"""Test the ES paginator only queries ES one time."""
ESPaginator(S(WebappIndexer), 5).object_list.execute()
eq_(_mock.call_count, 1)
def test_single_hit(self):
"""Test the ESPaginator only queries ES one time."""
es = WebappIndexer.get_es()
orig_search = es.search
es.counter = 0

def monkey_search(*args, **kwargs):
es.counter += 1
return orig_search(*args, **kwargs)

es.search = monkey_search

ESPaginator(WebappIndexer.search(), 5).object_list.execute()
eq_(es.counter, 1)

es.search = orig_search


class TestMetaSerializer(TestCase):
Expand Down
18 changes: 10 additions & 8 deletions mkt/collections/serializers.py
Expand Up @@ -44,11 +44,12 @@ class CollectionMembershipField(serializers.RelatedField):
def to_native(self, qs, use_es=False):
if use_es:
serializer_class = self.app_serializer_classes['es']
# To work around elasticsearch default limit of 10, hardcode a
# higher limit.
qs = qs[:100].execute()
else:
serializer_class = self.app_serializer_classes['normal']
# To work around elasticsearch default limit of 10, hardcode a higher
# limit.
return serializer_class(qs[:100], context=self.context, many=True).data
return serializer_class(qs, context=self.context, many=True).data

def _get_device(self, request):
# Fireplace sends `dev` and `device`. See the API docs. When
Expand All @@ -63,7 +64,7 @@ def _get_device(self, request):
return amo.DEVICE_LOOKUP.get(dev)

def field_to_native(self, obj, field_name):
if not hasattr(self, 'context') or not 'request' in self.context:
if not hasattr(self, 'context') or 'request' not in self.context:
raise ImproperlyConfigured('Pass request in self.context when'
' using CollectionMembershipField.')

Expand Down Expand Up @@ -105,12 +106,13 @@ def field_to_native_es(self, obj, request):
_rget = lambda d: getattr(request, d, False)
qs = Webapp.from_search(request, region=region, gaia=_rget('GAIA'),
mobile=_rget('MOBILE'), tablet=_rget('TABLET'))
filters = {'collection.id': obj.pk}
qs = qs.filter('term', **{'collection.id': obj.pk})
if device and device != amo.DEVICE_DESKTOP:
filters['device'] = device.id
qs = qs.filter('term', device=device.id)
if profile:
filters.update(**profile.to_kwargs(prefix='features.has_'))
qs = qs.filter(**filters).order_by({
for k, v in profile.to_kwargs(prefix='features.has_').items():
qs = qs.filter('term', **{k: v})
qs = qs.sort({
'collection.order': {
'order': 'asc',
'nested_filter': {
Expand Down
46 changes: 26 additions & 20 deletions mkt/feed/views.py
@@ -1,6 +1,6 @@
from django.db.models import Q

from elasticutils.contrib.django import S
from elasticsearch_dsl import query
from rest_framework import response, status, viewsets
from rest_framework.exceptions import ParseError
from rest_framework.filters import BaseFilterBackend, OrderingFilter
Expand Down Expand Up @@ -297,25 +297,31 @@ def _phrase(self, q):
def get(self, request, *args, **kwargs):
q = request.GET.get('q')

# Gather.
query = {
'slug__match': self._phrase(q),
'type__match': self._phrase(q),
'should': True
}

feed_app_ids = ([pk[0] for pk in S(FeedAppIndexer).query(
search_names__fuzzy=q, **query).values_list('id')])

feed_brand_ids = [pk[0] for pk in S(FeedBrandIndexer).query(
**query).values_list('id')]

feed_collection_ids = ([pk[0] for pk in S(FeedCollectionIndexer).query(
search_names__fuzzy=q, **query).values_list('id')])

feed_shelf_ids = ([pk[0] for pk in S(FeedShelfIndexer).query(
search_names__fuzzy=q, slug__fuzzy=q, carrier__prefix=q, region=q,
should=True).values_list('id')])
match_query = (
query.Q('match', slug=self._phrase(q)),
query.Q('match', type=self._phrase(q)),
)
fuzzy_query = match_query + (query.Q('fuzzy', search_names=q),)

feed_app_ids = [
hit.id for hit in FeedAppIndexer.search().query(
query.Bool(should=fuzzy_query)).execute().hits]

feed_brand_ids = [
hit.id for hit in FeedBrandIndexer.search().query(
query.Bool(should=match_query)).execute().hits]

feed_collection_ids = [
hit.id for hit in FeedCollectionIndexer.search().query(
query.Bool(should=fuzzy_query)).execute().hits]

feed_shelf_ids = [
hit.id for hit in FeedShelfIndexer.search().query(
query.Bool(should=(
query.Q('fuzzy', search_names=q),
query.Q('fuzzy', slug=q),
query.Q('prefix', carrier=q),
query.Q('term', region=q)))).execute().hits]

# Dehydrate.
apps = FeedApp.objects.filter(id__in=feed_app_ids)
Expand Down
4 changes: 2 additions & 2 deletions mkt/fireplace/tests/test_views.py
Expand Up @@ -4,6 +4,7 @@
from django.core.urlresolvers import reverse
from django.db.models.query import QuerySet

from elasticsearch_dsl.search import Search
from mock import patch
from nose.tools import eq_, ok_
from test_utils import RequestFactory
Expand All @@ -15,7 +16,6 @@
from mkt.collections.constants import COLLECTIONS_TYPE_BASIC
from mkt.collections.models import Collection
from mkt.fireplace.serializers import FireplaceAppSerializer
from mkt.search.utils import S
from mkt.site.fixtures import fixture
from mkt.webapps.models import AddonUser, Installed, Webapp
from mkt.users.models import UserProfile
Expand Down Expand Up @@ -145,7 +145,7 @@ def test_no_get_preview(self, mock_field_to_native):
eq_(res.json['apps'], [])

eq_(mock_field_to_native.call_count, 1)
ok_(isinstance(mock_field_to_native.call_args[0][0], S))
ok_(isinstance(mock_field_to_native.call_args[0][0], Search))
eq_(mock_field_to_native.call_args[1].get('use_es', False), True)


Expand Down
1 change: 1 addition & 0 deletions mkt/lookup/tests/test_views.py
Expand Up @@ -651,6 +651,7 @@ def verify_result(self, data):
eq_(data['results'][0]['id'], self.app.pk)
eq_(data['results'][0]['url'], reverse('lookup.app_summary',
args=[self.app.pk]))
eq_(data['results'][0]['app_slug'], self.app.app_slug)

def test_by_name_part(self):
self.app.name = 'This is Steamcube'
Expand Down

0 comments on commit 01af1a1

Please sign in to comment.