Skip to content
This repository has been archived by the owner on Jan 28, 2020. It is now read-only.

Commit

Permalink
Removed haystack
Browse files Browse the repository at this point in the history
  • Loading branch information
George Schneeloch committed Oct 8, 2015
1 parent c6c889e commit 006a743
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 187 deletions.
10 changes: 2 additions & 8 deletions learningresources/tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@
from django.conf import settings
from django.contrib.auth.models import User, Permission
from django.core.files.storage import default_storage
from django.core.management import call_command
from django.test import Client
from django.test.testcases import TestCase
from django.test.utils import override_settings
import haystack

from learningresources.api import (
create_repo,
Expand All @@ -28,7 +26,7 @@
update_description_path
)
from learningresources.models import Repository, StaticAsset
from search.utils import recreate_index, refresh_index
from search.utils import recreate_index, refresh_index, remove_index

log = logging.getLogger(__name__)
# Using the md5 hasher speeds up tests.
Expand Down Expand Up @@ -131,11 +129,7 @@ def tearDown(self):
"""Clean up Elasticsearch and static assets between tests."""
for static_asset in StaticAsset.objects.all():
default_storage.delete(static_asset.asset)
for key, _ in haystack.connections.connections_info.items():
haystack.connections.reload(key)
call_command('clear_index', interactive=False, verbosity=0)
recreate_index()
refresh_index()
remove_index()

def _make_archive(self, path, make_zip=False, ext=None):
"""
Expand Down
10 changes: 4 additions & 6 deletions lore/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ def get_var(name, default):
'taxonomy',
'rest',
'rest_framework',
'haystack',
'search',
'roles',
'xanalytics',
Expand Down Expand Up @@ -385,15 +384,10 @@ def get_var(name, default):
# Haystack
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': (
'haystack.backends.elasticsearch_backend'
'.ElasticsearchSearchEngine'
),
'URL': get_var('HAYSTACK_URL', 'http://127.0.0.1:9200'),
'INDEX_NAME': get_var('HAYSTACK_INDEX', 'haystack'),
}
}
HAYSTACK_SIGNAL_PROCESSOR = 'search.signals.LoreRealTimeSignalProcessor'

XANALYTICS_URL = get_var('XANALYTICS_URL', "")

Expand All @@ -414,3 +408,7 @@ def get_var(name, default):

# Google analytics code
GOOGLE_ANALYTICS_ID = get_var('LORE_GOOGLE_ANALYTICS_ID', None)

# This is needed to connect the signals properly.
# pylint: disable=unused-import
import search.signals # noqa
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ django-bootstrap3==6.2.2
django-compressor==1.5
django-debug-toolbar==1.3.2
django-elasticsearch-debug-toolbar==0.1.15
django-haystack==2.4.0
djangorestframework==3.2.2
dj-database-url==0.3.0
dj-static==0.0.6
Expand Down
14 changes: 0 additions & 14 deletions search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

from __future__ import unicode_literals

from haystack.query import SearchQuerySet

from search.sorting import LoreSortingFields
from search.utils import search_index
from taxonomy.models import Vocabulary, Term
Expand All @@ -29,28 +27,16 @@ def construct_queryset(repo_slug, query='', selected_facets=None, sortby=''):
if selected_facets is None:
selected_facets = []

queryset = SearchQuerySet()

kwargs = {}
for facet in selected_facets:
queryset = queryset.narrow(facet)

if query != "":
kwargs["content"] = query

queryset = queryset.filter(**kwargs)

queryset = queryset.narrow("repository_exact:{slug}".format(
slug=repo_slug))

if sortby == "":
sortby = LoreSortingFields.DEFAULT_SORTING_FIELD
# default values in case of weird sorting options
sortby, _, order_direction = LoreSortingFields.get_sorting_option(
sortby)
sortby = "{0}{1}".format(order_direction, sortby)
queryset = queryset.order_by(
sortby, LoreSortingFields.BASE_SORTING_FIELD)

# Do a parallel query using elasticsearch-dsl.
if query not in ("", None):
Expand Down
121 changes: 1 addition & 120 deletions search/search_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@
import logging

from django.core.cache import caches
from haystack import indexes
from lxml import etree

from learningresources.models import Course, LearningResource, get_preview_url
from learningresources.models import Course, LearningResource

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -64,120 +62,3 @@ def get_vocabs(resource_id):
for rel in rels.iterator():
data[rel.term.vocabulary_id].append(rel.term_id)
return dict(data)


class LearningResourceIndex(indexes.SearchIndex, indexes.Indexable):
"""
Index configuration for the LearningResource model.
"""
text = indexes.CharField(document=True)
resource_type = indexes.CharField(faceted=True)
course = indexes.CharField(faceted=True)
run = indexes.CharField(faceted=True)

# repository is here for filtering the repository listing page by the
# repo_slug in the URL. It is not used or needed in the repository listing
# page because that page is always for a single repository.
repository = indexes.CharField(faceted=True)

nr_views = indexes.IntegerField(model_attr="xa_nr_views")
nr_attempts = indexes.IntegerField(model_attr="xa_nr_attempts")
avg_grade = indexes.FloatField(model_attr="xa_avg_grade")

lid = indexes.IntegerField(model_attr="id", indexed=False)
title = indexes.CharField(model_attr="title", indexed=False)
titlesort = indexes.CharField(indexed=False)
description = indexes.CharField(model_attr="description", indexed=False)
description_path = indexes.CharField(
model_attr="description_path",
indexed=False,
)
preview_url = indexes.CharField(indexed=False)

def get_model(self):
"""Return the model for which this configures indexing."""
return LearningResource

def index_queryset(self, using=None):
"""Records to check when updating entire index."""
return self.get_model().objects.all()

def prepare_text(self, obj): # pylint: disable=no-self-use
"""Indexing of the primary content of a LearningResource."""
try:
# Strip XML tags from content before indexing.
tree = etree.fromstring(obj.content_xml)
content = etree.tostring(tree, encoding="utf-8", method="text")
except etree.XMLSyntaxError:
# For blank/invalid XML.
content = obj.content_xml
try:
content = content.decode('utf-8')
except AttributeError:
# For Python 3.
pass

return "{0} {1} {2}".format(
obj.title, obj.description, content
)

def prepare_run(self, obj): # pylint: disable=no-self-use
"""Define what goes into the "run" index."""
data = get_course_metadata(obj.course_id)
return data["run"]

def prepare_preview_url(self, obj): # pylint: disable=no-self-use
"""Define what goes into the "run" index."""
data = get_course_metadata(obj.course_id)
return get_preview_url(
obj,
org=data["org"],
course_number=data["course_number"],
run=data["run"],
)

def prepare_titlesort(self, obj): # pylint: disable=no-self-use
"""Define what goes into the "titlesort" index."""
title = obj.title.strip()
# hack to handle empty titles
# to show up at the bottom of the sorted list
if title:
title = '0{0}'.format(title)
return title.lower()
return '1'

@staticmethod
def prepare_course(obj):
"""Define what goes into the "course" index."""
data = get_course_metadata(obj.course_id)
return data["course_number"]

@staticmethod
def prepare_resource_type(obj):
"""The name of the LearningResourceType."""
return obj.learning_resource_type.name

def prepare(self, obj):
"""
Get dynamic vocabularies.
The prepare() method runs last, similar to Django's form.clean().
This allows us to override anything we want. Here we add vocabularies
to the index because they must be dynamic.
Technically, we could handle the other stuff (run, course, etc.) here
as well, but don't because explicit is better than implicit.
"""
prepared = super(LearningResourceIndex, self).prepare(obj)
for vocab_id, term_ids in get_vocabs(obj.id).items():
# Use the integer primary keys as index values. This saves space,
# and also avoids all issues dealing with "special" characters.
prepared[vocab_id] = term_ids
# For faceted "_exact" in URL.
prepared["{0}_exact".format(vocab_id)] = term_ids
return prepared

def prepare_repository(self, obj): # pylint: disable=no-self-use
"""Use the slug for the repo, since it's unique."""
data = get_course_metadata(obj.course_id)
return data["repo_slug"]
21 changes: 1 addition & 20 deletions search/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,22 @@

from django.db.models.signals import m2m_changed, post_save, post_delete
from django.dispatch import receiver
from haystack.signals import RealtimeSignalProcessor
from statsd.defaults.django import statsd

log = logging.getLogger(__name__)


class LoreRealTimeSignalProcessor(RealtimeSignalProcessor):
"""
Add timers for Haystack signal processing.
"""
@statsd.timer('lore.haystack.save_signal')
def handle_save(self, sender, instance, **kwargs):
super(LoreRealTimeSignalProcessor, self).handle_save(
sender, instance, **kwargs
)

@statsd.timer('lore.haystack.delete_signal')
def handle_delete(self, sender, instance, **kwargs):
super(LoreRealTimeSignalProcessor, self).handle_delete(
sender, instance, **kwargs
)


# pylint: disable=unused-argument
@statsd.timer('lore.haystack.taxonomy_update')
@receiver(m2m_changed)
def handle_m2m_save(sender, **kwargs):
"""Update index when taxonomies are updated."""
from search.search_indexes import LearningResourceIndex, get_vocabs
from search.search_indexes import get_vocabs
instance = kwargs.pop("instance")
if instance.__class__.__name__ != "LearningResource":
return
# Update cache for the LearningResource if it's already set.
get_vocabs(instance.id)
LearningResourceIndex().update_object(instance)
# Update Elasticsearch index:
from search.utils import index_resources
index_resources([instance])
Expand Down
4 changes: 2 additions & 2 deletions search/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,11 @@ def get_count():
return count

set_cache_timeout(0)
with self.assertNumQueries(31):
with self.assertNumQueries(16):
self.assertEqual(get_count(), 0)

set_cache_timeout(60)
with self.assertNumQueries(17):
with self.assertNumQueries(14):
self.assertEqual(get_count(), 1)

def test_course_cache(self):
Expand Down
Loading

0 comments on commit 006a743

Please sign in to comment.