diff --git a/lore/settings.py b/lore/settings.py index ca7ccf95..7e81501f 100644 --- a/lore/settings.py +++ b/lore/settings.py @@ -353,5 +353,15 @@ def get_var(name, default): } } HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor' -ALLOW_CACHING = get_var("ALLOW_CACHING", get_var("ALLOW_CACHING", False)) + XANALYTICS_URL = get_var('XANALYTICS_URL', "") + +CACHES = { + "default": { + "BACKEND": "django.core.cache.backends.locmem.LocMemCache", + }, + "lore_indexing": { + "BACKEND": "django.core.cache.backends.locmem.LocMemCache", + "TIMEOUT": get_var("LORE_INDEXING_CACHE_TIMEOUT", "60"), + } +} diff --git a/search/search_indexes.py b/search/search_indexes.py index b3b93268..c88a38f5 100644 --- a/search/search_indexes.py +++ b/search/search_indexes.py @@ -15,41 +15,17 @@ from __future__ import unicode_literals from collections import defaultdict -from datetime import datetime, timedelta import logging -from django.conf import settings +from django.core.cache import caches from haystack import indexes from lxml import etree from learningresources.models import Course, LearningResource -INDEX_CACHE = { - "course": {}, - "term": {}, - "born": datetime.now(), -} -MAX_INDEX_AGE = timedelta(minutes=1) - log = logging.getLogger(__name__) - -def check_cache_age(): - """ - Keep the cache from getting stale. - - Must not cache during tests or development, or it will - cause confusion. Must manually enable caching in production. - """ - # pylint: disable=global-statement - global INDEX_CACHE - if (datetime.now() - INDEX_CACHE["born"] > MAX_INDEX_AGE) \ - or settings.ALLOW_CACHING is False: - INDEX_CACHE = { - "course": {}, - "term": {}, - "born": datetime.now(), - } +cache = caches["lore_indexing"] def get_course_metadata(course_id): @@ -60,36 +36,60 @@ def get_course_metadata(course_id): Returns: data (dict): Metadata about course. """ - check_cache_age() - data = INDEX_CACHE["course"].get(course_id, {}) + key = "course_metadata_{0}".format(course_id) + data = cache.get(key, {}) if data == {}: - course = Course.objects.get(id=course_id) + course = Course.objects.select_related("repository").get(id=course_id) data["run"] = course.run data["course_number"] = course.course_number data["org"] = course.org data["repo_slug"] = course.repository.slug - INDEX_CACHE["course"][course_id] = data + cache.set(key, data) + # Return `data` directly, not from the cache. Otherwise, if caching + # is disabled (TIMEOUT == 0), this will always return nothing. return data -def get_vocabs(course_id, lid): +def get_vocabs(course_id, resource_id, solo_update=False): """ Caches and returns taxonomy metadata for a course. Args: - course_id (int): Primary key of learningresources.models.Course - lid (int): Primary key of learningresources.models.LearningResource + course_id (int): Primary key of Course + resource_id (int): Primary key of LearningResource Returns: data (dict): Vocab/term data for course. """ - check_cache_age() - if INDEX_CACHE["term"].get(course_id) is None: - INDEX_CACHE["term"][course_id] = defaultdict(lambda: defaultdict(list)) - rels = LearningResource.terms.related.through.objects.select_related( - "term").filter(learningresource__course__id=course_id) - for rel in rels.iterator(): - INDEX_CACHE["term"][course_id][rel.learningresource_id][ - rel.term.vocabulary_id].append(rel.term_id) - return INDEX_CACHE["term"][course_id][lid] + key = "vocab_cache_{0}".format(resource_id) + cached = cache.get(key) + if (solo_update is False) and (cached is not None): + return cached + + # Pre-populate the cache with blank values in case there are no + # terms for that LearningResource. Otherwise, looking up the vocabularies + # for that resource will refill the cache for the entire course. If there + # is already a value, retain it. + resource_ids = LearningResource.objects.all().values_list('id', flat=True) + for resource_id in resource_ids: + rkey = "vocab_cache_{0}".format(resource_id) + cache.set(rkey, cache.get(rkey, {})) + value = {} + + term_cache = defaultdict(lambda: defaultdict(list)) + rels = LearningResource.terms.related.through.objects.select_related( + "term").filter(learningresource__course__id=course_id) + if solo_update is True: + rels = rels.filter(learningresource_id=resource_id) + for rel in rels.iterator(): + term_cache[rel.learningresource_id][ + rel.term.vocabulary_id].append(rel.term_id) + for lid, data in term_cache.items(): + lkey = "vocab_cache_{0}".format(lid) + if lkey == key: + value = dict(data) + cache.set(lkey, dict(data)) + # Return `value` directly, not from the cache. Otherwise, if caching + # is disabled (TIMEOUT == 0), this will always return nothing. + return value class LearningResourceIndex(indexes.SearchIndex, indexes.Indexable): @@ -181,7 +181,6 @@ def prepare(self, obj): as well, but don't because explicit is better than implicit. """ prepared = super(LearningResourceIndex, self).prepare(obj) - for vocab_id, term_ids in get_vocabs(obj.course_id, obj.id).items(): # Use the integer primary keys as index values. This saves space, # and also avoids all issues dealing with "special" characters. diff --git a/search/signals.py b/search/signals.py index 6cd99b84..319a14b7 100644 --- a/search/signals.py +++ b/search/signals.py @@ -11,7 +11,7 @@ from django.db.models.signals import m2m_changed from django.dispatch import receiver -from search.search_indexes import LearningResourceIndex +from search.search_indexes import LearningResourceIndex, get_vocabs log = logging.getLogger(__name__) @@ -23,4 +23,6 @@ def handle_m2m_save(sender, **kwargs): instance = kwargs.pop("instance", None) if instance.__class__.__name__ != "LearningResource": return + # Update cache for the LearningResource if it's already set. + get_vocabs(instance.course_id, instance.id, solo_update=True) LearningResourceIndex().update_object(instance) diff --git a/search/tests/test_indexing.py b/search/tests/test_indexing.py index 039fe313..758b1adc 100644 --- a/search/tests/test_indexing.py +++ b/search/tests/test_indexing.py @@ -1,15 +1,34 @@ """Tests for search engine indexing.""" from __future__ import unicode_literals -from django.conf import settings +import logging +from search.search_indexes import get_course_metadata, get_vocabs, cache from search.sorting import LoreSortingFields from search.tests.base import SearchTestCase +log = logging.getLogger(__name__) + + +def set_cache_timeout(seconds): + """Override the cache timeout for testing.""" + cache.default_timeout = seconds + cache.clear() + class TestIndexing(SearchTestCase): """Test Elasticsearch indexing.""" + def setUp(self): + """Remember old caching settings.""" + super(TestIndexing, self).setUp() + self.original_timeout = cache.default_timeout + + def tearDown(self): + """Restore old caching settings.""" + super(TestIndexing, self).tearDown() + cache.default_timeout = self.original_timeout + def test_index_on_save(self): """Index a LearningObject upon creation.""" search_text = "The quick brown fox." @@ -23,15 +42,16 @@ def test_index_vocabulary(self): Test that LearningResource indexes are updated when a a term is added or removed. """ + set_cache_timeout(0) term = self.terms[0] - self.assertTrue(self.count_faceted_results( - self.vocabulary.id, term.id) == 0) + self.assertEqual(self.count_faceted_results( + self.vocabulary.id, term.id), 0) self.resource.terms.add(term) - self.assertTrue(self.count_faceted_results( - self.vocabulary.id, term.id) == 1) + self.assertEqual(self.count_faceted_results( + self.vocabulary.id, term.id), 1) self.resource.terms.remove(term) - self.assertTrue(self.count_faceted_results( - self.vocabulary.id, term.id) == 0) + self.assertEqual(self.count_faceted_results( + self.vocabulary.id, term.id), 0) def test_strip_xml(self): """Indexed content_xml should have XML stripped.""" @@ -143,12 +163,61 @@ def get_count(): self.resource.save() return count - orig_cache_setting = settings.ALLOW_CACHING + set_cache_timeout(0) + with self.assertNumQueries(23): + self.assertEqual(get_count(), 0) + + set_cache_timeout(60) + with self.assertNumQueries(8): + self.assertEqual(get_count(), 1) - settings.ALLOW_CACHING = False - self.assertEqual(get_count(), 0) + def test_course_cache(self): + """ + Test caching -- enabled and disabled -- for course metadata. + """ + def three_times(): + """Get course metadata three times.""" + for _ in range(0, 3): + get_course_metadata(self.course.id) + + set_cache_timeout(0) + with self.assertNumQueries(3): + three_times() + + set_cache_timeout(60) + with self.assertNumQueries(1): + three_times() + + def thrice(self): + """Hit vocabs three times with and without caching.""" + def three_times(): + """Get vocab data three times.""" + for _ in range(0, 3): + get_vocabs(self.course.id, self.resource.id) + + set_cache_timeout(0) + with self.assertNumQueries(6): + three_times() + + set_cache_timeout(60) + with self.assertNumQueries(2): + three_times() + + def test_term_cache_with_data(self): + """ + Test caching -- enabled and disabled -- for vocabularies. + This should work if there are vocabulary terms, because + there is something to cache. + """ + self.resource.terms.add(self.terms[0]) + self.thrice() - settings.ALLOW_CACHING = True - self.assertEqual(get_count(), 1) + def test_term_cache_without_data(self): + """ + Test caching -- enabled and disabled -- for vocabularies. - settings.ALLOW_CACHING = orig_cache_setting + This should work if there are not vocabulary terms. + Otherwise, it'll reload the cache for an entire course whenever + a LearningResource isn't tagged with any terms. + """ + self.thrice()