Removed haystack

mitodl · Oct 8, 2015 · 006a743 · 006a743
1 parent c6c889e
commit 006a743
Show file tree

Hide file tree

Showing 8 changed files with 69 additions and 187 deletions.
diff --git a/learningresources/tests/base.py b/learningresources/tests/base.py
@@ -15,11 +15,9 @@
 from django.conf import settings
 from django.contrib.auth.models import User, Permission
 from django.core.files.storage import default_storage
-from django.core.management import call_command
 from django.test import Client
 from django.test.testcases import TestCase
 from django.test.utils import override_settings
-import haystack
 
 from learningresources.api import (
     create_repo,
@@ -28,7 +26,7 @@
     update_description_path
 )
 from learningresources.models import Repository, StaticAsset
-from search.utils import recreate_index, refresh_index
+from search.utils import recreate_index, refresh_index, remove_index
 
 log = logging.getLogger(__name__)
 # Using the md5 hasher speeds up tests.
@@ -131,11 +129,7 @@ def tearDown(self):
         """Clean up Elasticsearch and static assets between tests."""
         for static_asset in StaticAsset.objects.all():
             default_storage.delete(static_asset.asset)
-        for key, _ in haystack.connections.connections_info.items():
-            haystack.connections.reload(key)
-        call_command('clear_index', interactive=False, verbosity=0)
-        recreate_index()
-        refresh_index()
+        remove_index()
 
     def _make_archive(self, path, make_zip=False, ext=None):
         """

diff --git a/lore/settings.py b/lore/settings.py
@@ -95,7 +95,6 @@ def get_var(name, default):
     'taxonomy',
     'rest',
     'rest_framework',
-    'haystack',
     'search',
     'roles',
     'xanalytics',
@@ -385,15 +384,10 @@ def get_var(name, default):
 # Haystack
 HAYSTACK_CONNECTIONS = {
     'default': {
-        'ENGINE': (
-            'haystack.backends.elasticsearch_backend'
-            '.ElasticsearchSearchEngine'
-        ),
         'URL': get_var('HAYSTACK_URL', 'http://127.0.0.1:9200'),
         'INDEX_NAME': get_var('HAYSTACK_INDEX', 'haystack'),
     }
 }
-HAYSTACK_SIGNAL_PROCESSOR = 'search.signals.LoreRealTimeSignalProcessor'
 
 XANALYTICS_URL = get_var('XANALYTICS_URL', "")
 
@@ -414,3 +408,7 @@ def get_var(name, default):
 
 # Google analytics code
 GOOGLE_ANALYTICS_ID = get_var('LORE_GOOGLE_ANALYTICS_ID', None)
+
+# This is needed to connect the signals properly.
+# pylint: disable=unused-import
+import search.signals  # noqa
diff --git a/requirements.txt b/requirements.txt
@@ -6,7 +6,6 @@ django-bootstrap3==6.2.2
 django-compressor==1.5
 django-debug-toolbar==1.3.2
 django-elasticsearch-debug-toolbar==0.1.15
-django-haystack==2.4.0
 djangorestframework==3.2.2
 dj-database-url==0.3.0
 dj-static==0.0.6

diff --git a/search/api.py b/search/api.py
@@ -4,8 +4,6 @@
 
 from __future__ import unicode_literals
 
-from haystack.query import SearchQuerySet
-
 from search.sorting import LoreSortingFields
 from search.utils import search_index
 from taxonomy.models import Vocabulary, Term
@@ -29,28 +27,16 @@ def construct_queryset(repo_slug, query='', selected_facets=None, sortby=''):
     if selected_facets is None:
         selected_facets = []
 
-    queryset = SearchQuerySet()
-
     kwargs = {}
-    for facet in selected_facets:
-        queryset = queryset.narrow(facet)
-
     if query != "":
         kwargs["content"] = query
 
-    queryset = queryset.filter(**kwargs)
-
-    queryset = queryset.narrow("repository_exact:{slug}".format(
-        slug=repo_slug))
-
     if sortby == "":
         sortby = LoreSortingFields.DEFAULT_SORTING_FIELD
     # default values in case of weird sorting options
     sortby, _, order_direction = LoreSortingFields.get_sorting_option(
         sortby)
     sortby = "{0}{1}".format(order_direction, sortby)
-    queryset = queryset.order_by(
-        sortby, LoreSortingFields.BASE_SORTING_FIELD)
 
     # Do a parallel query using elasticsearch-dsl.
     if query not in ("", None):

diff --git a/search/search_indexes.py b/search/search_indexes.py
@@ -18,10 +18,8 @@
 import logging
 
 from django.core.cache import caches
-from haystack import indexes
-from lxml import etree
 
-from learningresources.models import Course, LearningResource, get_preview_url
+from learningresources.models import Course, LearningResource
 
 log = logging.getLogger(__name__)
 
@@ -64,120 +62,3 @@ def get_vocabs(resource_id):
     for rel in rels.iterator():
         data[rel.term.vocabulary_id].append(rel.term_id)
     return dict(data)
-
-
-class LearningResourceIndex(indexes.SearchIndex, indexes.Indexable):
-    """
-    Index configuration for the LearningResource model.
-    """
-    text = indexes.CharField(document=True)
-    resource_type = indexes.CharField(faceted=True)
-    course = indexes.CharField(faceted=True)
-    run = indexes.CharField(faceted=True)
-
-    # repository is here for filtering the repository listing page by the
-    # repo_slug in the URL. It is not used or needed in the repository listing
-    # page because that page is always for a single repository.
-    repository = indexes.CharField(faceted=True)
-
-    nr_views = indexes.IntegerField(model_attr="xa_nr_views")
-    nr_attempts = indexes.IntegerField(model_attr="xa_nr_attempts")
-    avg_grade = indexes.FloatField(model_attr="xa_avg_grade")
-
-    lid = indexes.IntegerField(model_attr="id", indexed=False)
-    title = indexes.CharField(model_attr="title", indexed=False)
-    titlesort = indexes.CharField(indexed=False)
-    description = indexes.CharField(model_attr="description", indexed=False)
-    description_path = indexes.CharField(
-        model_attr="description_path",
-        indexed=False,
-    )
-    preview_url = indexes.CharField(indexed=False)
-
-    def get_model(self):
-        """Return the model for which this configures indexing."""
-        return LearningResource
-
-    def index_queryset(self, using=None):
-        """Records to check when updating entire index."""
-        return self.get_model().objects.all()
-
-    def prepare_text(self, obj):  # pylint: disable=no-self-use
-        """Indexing of the primary content of a LearningResource."""
-        try:
-            # Strip XML tags from content before indexing.
-            tree = etree.fromstring(obj.content_xml)
-            content = etree.tostring(tree, encoding="utf-8", method="text")
-        except etree.XMLSyntaxError:
-            # For blank/invalid XML.
-            content = obj.content_xml
-        try:
-            content = content.decode('utf-8')
-        except AttributeError:
-            # For Python 3.
-            pass
-
-        return "{0} {1} {2}".format(
-            obj.title, obj.description, content
-        )
-
-    def prepare_run(self, obj):  # pylint: disable=no-self-use
-        """Define what goes into the "run" index."""
-        data = get_course_metadata(obj.course_id)
-        return data["run"]
-
-    def prepare_preview_url(self, obj):  # pylint: disable=no-self-use
-        """Define what goes into the "run" index."""
-        data = get_course_metadata(obj.course_id)
-        return get_preview_url(
-            obj,
-            org=data["org"],
-            course_number=data["course_number"],
-            run=data["run"],
-        )
-
-    def prepare_titlesort(self, obj):  # pylint: disable=no-self-use
-        """Define what goes into the "titlesort" index."""
-        title = obj.title.strip()
-        # hack to handle empty titles
-        # to show up at the bottom of the sorted list
-        if title:
-            title = '0{0}'.format(title)
-            return title.lower()
-        return '1'
-
-    @staticmethod
-    def prepare_course(obj):
-        """Define what goes into the "course" index."""
-        data = get_course_metadata(obj.course_id)
-        return data["course_number"]
-
-    @staticmethod
-    def prepare_resource_type(obj):
-        """The name of the LearningResourceType."""
-        return obj.learning_resource_type.name
-
-    def prepare(self, obj):
-        """
-        Get dynamic vocabularies.
-
-        The prepare() method runs last, similar to Django's form.clean().
-        This allows us to override anything we want. Here we add vocabularies
-        to the index because they must be dynamic.
-
-        Technically, we could handle the other stuff (run, course, etc.) here
-        as well, but don't because explicit is better than implicit.
-        """
-        prepared = super(LearningResourceIndex, self).prepare(obj)
-        for vocab_id, term_ids in get_vocabs(obj.id).items():
-            # Use the integer primary keys as index values. This saves space,
-            # and also avoids all issues dealing with "special" characters.
-            prepared[vocab_id] = term_ids
-            # For faceted "_exact" in URL.
-            prepared["{0}_exact".format(vocab_id)] = term_ids
-        return prepared
-
-    def prepare_repository(self, obj):  # pylint: disable=no-self-use
-        """Use the slug for the repo, since it's unique."""
-        data = get_course_metadata(obj.course_id)
-        return data["repo_slug"]
diff --git a/search/signals.py b/search/signals.py
@@ -10,41 +10,22 @@
 
 from django.db.models.signals import m2m_changed, post_save, post_delete
 from django.dispatch import receiver
-from haystack.signals import RealtimeSignalProcessor
 from statsd.defaults.django import statsd
 
 log = logging.getLogger(__name__)
 
 
-class LoreRealTimeSignalProcessor(RealtimeSignalProcessor):
-    """
-    Add timers for Haystack signal processing.
-    """
-    @statsd.timer('lore.haystack.save_signal')
-    def handle_save(self, sender, instance, **kwargs):
-        super(LoreRealTimeSignalProcessor, self).handle_save(
-            sender, instance, **kwargs
-        )
-
-    @statsd.timer('lore.haystack.delete_signal')
-    def handle_delete(self, sender, instance, **kwargs):
-        super(LoreRealTimeSignalProcessor, self).handle_delete(
-            sender, instance, **kwargs
-        )
-
-
 # pylint: disable=unused-argument
 @statsd.timer('lore.haystack.taxonomy_update')
 @receiver(m2m_changed)
 def handle_m2m_save(sender, **kwargs):
     """Update index when taxonomies are updated."""
-    from search.search_indexes import LearningResourceIndex, get_vocabs
+    from search.search_indexes import get_vocabs
     instance = kwargs.pop("instance")
     if instance.__class__.__name__ != "LearningResource":
         return
     # Update cache for the LearningResource if it's already set.
     get_vocabs(instance.id)
-    LearningResourceIndex().update_object(instance)
     # Update Elasticsearch index:
     from search.utils import index_resources
     index_resources([instance])

diff --git a/search/tests/test_indexing.py b/search/tests/test_indexing.py
@@ -205,11 +205,11 @@ def get_count():
             return count
 
         set_cache_timeout(0)
-        with self.assertNumQueries(31):
+        with self.assertNumQueries(16):
             self.assertEqual(get_count(), 0)
 
         set_cache_timeout(60)
-        with self.assertNumQueries(17):
+        with self.assertNumQueries(14):
             self.assertEqual(get_count(), 1)
 
     def test_course_cache(self):