Skip to content
This repository has been archived by the owner on Jan 28, 2020. It is now read-only.

Commit

Permalink
Updated indexing caching from dict to Django's cache.
Browse files Browse the repository at this point in the history
Closes #467.

Improves upon PR #460 by getting rid of a "global" variable and
a custom settings.py value. Also adds tests which confirm that
fewer queries are done when the caching is used.
  • Loading branch information
ShawnMilo committed Aug 11, 2015
1 parent b8b6607 commit 461573e
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 58 deletions.
12 changes: 11 additions & 1 deletion lore/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,5 +353,15 @@ def get_var(name, default):
}
}
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
ALLOW_CACHING = get_var("ALLOW_CACHING", get_var("ALLOW_CACHING", False))

XANALYTICS_URL = get_var('XANALYTICS_URL', "")

CACHES = {
"default": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
},
"lore_indexing": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
"TIMEOUT": get_var("LORE_INDEXING_CACHE_TIMEOUT", "60"),
}
}
85 changes: 42 additions & 43 deletions search/search_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,41 +15,17 @@
from __future__ import unicode_literals

from collections import defaultdict
from datetime import datetime, timedelta
import logging

from django.conf import settings
from django.core.cache import caches
from haystack import indexes
from lxml import etree

from learningresources.models import Course, LearningResource

INDEX_CACHE = {
"course": {},
"term": {},
"born": datetime.now(),
}
MAX_INDEX_AGE = timedelta(minutes=1)

log = logging.getLogger(__name__)


def check_cache_age():
"""
Keep the cache from getting stale.
Must not cache during tests or development, or it will
cause confusion. Must manually enable caching in production.
"""
# pylint: disable=global-statement
global INDEX_CACHE
if (datetime.now() - INDEX_CACHE["born"] > MAX_INDEX_AGE) \
or settings.ALLOW_CACHING is False:
INDEX_CACHE = {
"course": {},
"term": {},
"born": datetime.now(),
}
cache = caches["lore_indexing"]


def get_course_metadata(course_id):
Expand All @@ -60,36 +36,60 @@ def get_course_metadata(course_id):
Returns:
data (dict): Metadata about course.
"""
check_cache_age()
data = INDEX_CACHE["course"].get(course_id, {})
key = "course_metadata_{0}".format(course_id)
data = cache.get(key, {})
if data == {}:
course = Course.objects.get(id=course_id)
course = Course.objects.select_related("repository").get(id=course_id)
data["run"] = course.run
data["course_number"] = course.course_number
data["org"] = course.org
data["repo_slug"] = course.repository.slug
INDEX_CACHE["course"][course_id] = data
cache.set(key, data)
# Return `data` directly, not from the cache. Otherwise, if caching
# is disabled (TIMEOUT == 0), this will always return nothing.
return data


def get_vocabs(course_id, lid):
def get_vocabs(course_id, resource_id, solo_update=False):
"""
Caches and returns taxonomy metadata for a course.
Args:
course_id (int): Primary key of learningresources.models.Course
lid (int): Primary key of learningresources.models.LearningResource
course_id (int): Primary key of Course
resource_id (int): Primary key of LearningResource
Returns:
data (dict): Vocab/term data for course.
"""
check_cache_age()
if INDEX_CACHE["term"].get(course_id) is None:
INDEX_CACHE["term"][course_id] = defaultdict(lambda: defaultdict(list))
rels = LearningResource.terms.related.through.objects.select_related(
"term").filter(learningresource__course__id=course_id)
for rel in rels.iterator():
INDEX_CACHE["term"][course_id][rel.learningresource_id][
rel.term.vocabulary_id].append(rel.term_id)
return INDEX_CACHE["term"][course_id][lid]
key = "vocab_cache_{0}".format(resource_id)
cached = cache.get(key)
if (solo_update is False) and (cached is not None):
return cached

# Pre-populate the cache with blank values in case there are no
# terms for that LearningResource. Otherwise, looking up the vocabularies
# for that resource will refill the cache for the entire course. If there
# is already a value, retain it.
resource_ids = LearningResource.objects.all().values_list('id', flat=True)
for resource_id in resource_ids:
rkey = "vocab_cache_{0}".format(resource_id)
cache.set(rkey, cache.get(rkey, {}))
value = {}

term_cache = defaultdict(lambda: defaultdict(list))
rels = LearningResource.terms.related.through.objects.select_related(
"term").filter(learningresource__course__id=course_id)
if solo_update is True:
rels = rels.filter(learningresource_id=resource_id)
for rel in rels.iterator():
term_cache[rel.learningresource_id][
rel.term.vocabulary_id].append(rel.term_id)
for lid, data in term_cache.items():
lkey = "vocab_cache_{0}".format(lid)
if lkey == key:
value = dict(data)
cache.set(lkey, dict(data))
# Return `value` directly, not from the cache. Otherwise, if caching
# is disabled (TIMEOUT == 0), this will always return nothing.
return value


class LearningResourceIndex(indexes.SearchIndex, indexes.Indexable):
Expand Down Expand Up @@ -181,7 +181,6 @@ def prepare(self, obj):
as well, but don't because explicit is better than implicit.
"""
prepared = super(LearningResourceIndex, self).prepare(obj)

for vocab_id, term_ids in get_vocabs(obj.course_id, obj.id).items():
# Use the integer primary keys as index values. This saves space,
# and also avoids all issues dealing with "special" characters.
Expand Down
4 changes: 3 additions & 1 deletion search/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from django.db.models.signals import m2m_changed
from django.dispatch import receiver

from search.search_indexes import LearningResourceIndex
from search.search_indexes import LearningResourceIndex, get_vocabs

log = logging.getLogger(__name__)

Expand All @@ -23,4 +23,6 @@ def handle_m2m_save(sender, **kwargs):
instance = kwargs.pop("instance", None)
if instance.__class__.__name__ != "LearningResource":
return
# Update cache for the LearningResource if it's already set.
get_vocabs(instance.course_id, instance.id, solo_update=True)
LearningResourceIndex().update_object(instance)
95 changes: 82 additions & 13 deletions search/tests/test_indexing.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,34 @@
"""Tests for search engine indexing."""
from __future__ import unicode_literals

from django.conf import settings
import logging

from search.search_indexes import get_course_metadata, get_vocabs, cache
from search.sorting import LoreSortingFields
from search.tests.base import SearchTestCase

log = logging.getLogger(__name__)


def set_cache_timeout(seconds):
"""Override the cache timeout for testing."""
cache.default_timeout = seconds
cache.clear()


class TestIndexing(SearchTestCase):
"""Test Elasticsearch indexing."""

def setUp(self):
"""Remember old caching settings."""
super(TestIndexing, self).setUp()
self.original_timeout = cache.default_timeout

def tearDown(self):
"""Restore old caching settings."""
super(TestIndexing, self).tearDown()
cache.default_timeout = self.original_timeout

def test_index_on_save(self):
"""Index a LearningObject upon creation."""
search_text = "The quick brown fox."
Expand All @@ -23,15 +42,16 @@ def test_index_vocabulary(self):
Test that LearningResource indexes are updated when a
a term is added or removed.
"""
set_cache_timeout(0)
term = self.terms[0]
self.assertTrue(self.count_faceted_results(
self.vocabulary.id, term.id) == 0)
self.assertEqual(self.count_faceted_results(
self.vocabulary.id, term.id), 0)
self.resource.terms.add(term)
self.assertTrue(self.count_faceted_results(
self.vocabulary.id, term.id) == 1)
self.assertEqual(self.count_faceted_results(
self.vocabulary.id, term.id), 1)
self.resource.terms.remove(term)
self.assertTrue(self.count_faceted_results(
self.vocabulary.id, term.id) == 0)
self.assertEqual(self.count_faceted_results(
self.vocabulary.id, term.id), 0)

def test_strip_xml(self):
"""Indexed content_xml should have XML stripped."""
Expand Down Expand Up @@ -143,12 +163,61 @@ def get_count():
self.resource.save()
return count

orig_cache_setting = settings.ALLOW_CACHING
set_cache_timeout(0)
with self.assertNumQueries(23):
self.assertEqual(get_count(), 0)

set_cache_timeout(60)
with self.assertNumQueries(8):
self.assertEqual(get_count(), 1)

settings.ALLOW_CACHING = False
self.assertEqual(get_count(), 0)
def test_course_cache(self):
"""
Test caching -- enabled and disabled -- for course metadata.
"""
def three_times():
"""Get course metadata three times."""
for _ in range(0, 3):
get_course_metadata(self.course.id)

set_cache_timeout(0)
with self.assertNumQueries(3):
three_times()

set_cache_timeout(60)
with self.assertNumQueries(1):
three_times()

def thrice(self):
"""Hit vocabs three times with and without caching."""
def three_times():
"""Get vocab data three times."""
for _ in range(0, 3):
get_vocabs(self.course.id, self.resource.id)

set_cache_timeout(0)
with self.assertNumQueries(6):
three_times()

set_cache_timeout(60)
with self.assertNumQueries(2):
three_times()

def test_term_cache_with_data(self):
"""
Test caching -- enabled and disabled -- for vocabularies.
This should work if there are vocabulary terms, because
there is something to cache.
"""
self.resource.terms.add(self.terms[0])
self.thrice()

settings.ALLOW_CACHING = True
self.assertEqual(get_count(), 1)
def test_term_cache_without_data(self):
"""
Test caching -- enabled and disabled -- for vocabularies.
settings.ALLOW_CACHING = orig_cache_setting
This should work if there are not vocabulary terms.
Otherwise, it'll reload the cache for an entire course whenever
a LearningResource isn't tagged with any terms.
"""
self.thrice()

0 comments on commit 461573e

Please sign in to comment.