Skip to content
This repository has been archived by the owner on Jan 28, 2020. It is now read-only.

Commit

Permalink
Stripped caching out of vocabularies during indexing.
Browse files Browse the repository at this point in the history
Fixes #556.
  • Loading branch information
ShawnMilo committed Aug 21, 2015
1 parent c5c73e2 commit 72a60bc
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 75 deletions.
41 changes: 7 additions & 34 deletions search/search_indexes.py
Expand Up @@ -50,47 +50,20 @@ def get_course_metadata(course_id):
return data


def get_vocabs(course_id, resource_id, solo_update=False):
def get_vocabs(resource_id):
"""
Caches and returns taxonomy metadata for a course.
Returns taxonomy metadata for a course.
Args:
course_id (int): Primary key of Course
resource_id (int): Primary key of LearningResource
Returns:
data (dict): Vocab/term data for course.
"""
key = "vocab_cache_{0}".format(resource_id)
cached = cache.get(key)
if (solo_update is False) and (cached is not None):
return cached

# Pre-populate the cache with blank values in case there are no
# terms for that LearningResource. Otherwise, looking up the vocabularies
# for that resource will refill the cache for the entire course. If there
# is already a value, retain it.
resource_ids = LearningResource.objects.filter(
course__id=course_id).values_list('id', flat=True)
for resource_id in resource_ids:
rkey = "vocab_cache_{0}".format(resource_id)
cache.set(rkey, cache.get(rkey, {}))
value = {}

term_cache = defaultdict(lambda: defaultdict(list))
data = defaultdict(list)
rels = LearningResource.terms.related.through.objects.select_related(
"term").filter(learningresource__course__id=course_id)
if solo_update is True:
rels = rels.filter(learningresource_id=resource_id)
"term").filter(learningresource__id=resource_id)
for rel in rels.iterator():
term_cache[rel.learningresource_id][
rel.term.vocabulary_id].append(rel.term_id)
for lid, data in term_cache.items():
lkey = "vocab_cache_{0}".format(lid)
if lkey == key:
value = dict(data)
cache.set(lkey, dict(data))
# Return `value` directly, not from the cache. Otherwise, if caching
# is disabled (TIMEOUT == 0), this will always return nothing.
return value
data[rel.term.vocabulary_id].append(rel.term_id)
return dict(data)


class LearningResourceIndex(indexes.SearchIndex, indexes.Indexable):
Expand Down Expand Up @@ -182,7 +155,7 @@ def prepare(self, obj):
as well, but don't because explicit is better than implicit.
"""
prepared = super(LearningResourceIndex, self).prepare(obj)
for vocab_id, term_ids in get_vocabs(obj.course_id, obj.id).items():
for vocab_id, term_ids in get_vocabs(obj.id).items():
# Use the integer primary keys as index values. This saves space,
# and also avoids all issues dealing with "special" characters.
prepared[vocab_id] = term_ids
Expand Down
2 changes: 1 addition & 1 deletion search/signals.py
Expand Up @@ -44,5 +44,5 @@ def handle_m2m_save(sender, **kwargs):
if instance.__class__.__name__ != "LearningResource":
return
# Update cache for the LearningResource if it's already set.
get_vocabs(instance.course_id, instance.id, solo_update=True)
get_vocabs(instance.id)
LearningResourceIndex().update_object(instance)
45 changes: 5 additions & 40 deletions search/tests/test_indexing.py
Expand Up @@ -205,11 +205,11 @@ def get_count():
return count

set_cache_timeout(0)
with self.assertNumQueries(23):
with self.assertNumQueries(20):
self.assertEqual(get_count(), 0)

set_cache_timeout(60)
with self.assertNumQueries(8):
with self.assertNumQueries(9):
self.assertEqual(get_count(), 1)

def test_course_cache(self):
Expand All @@ -234,14 +234,14 @@ def thrice(self):
def three_times():
"""Get vocab data three times."""
for _ in range(0, 3):
get_vocabs(self.course.id, self.resource.id)
get_vocabs(self.resource.id)

set_cache_timeout(0)
with self.assertNumQueries(6):
with self.assertNumQueries(3):
three_times()

set_cache_timeout(60)
with self.assertNumQueries(2):
with self.assertNumQueries(3):
three_times()

def test_term_cache_with_data(self):
Expand All @@ -262,38 +262,3 @@ def test_term_cache_without_data(self):
a LearningResource isn't tagged with any terms.
"""
self.thrice()

def test_vocab_cache_by_course(self):
"""
Ensure that get_vocabs for one course doesn't
pollute the cache for another course.
"""
# Create a resource in a separate course.
self.course, course2 = copy_instance(self.course)
self.resource, resource2 = copy_instance(self.resource)
course2.run = "{0} part 2".format(self.course.run)
course2.save()
resource2.course = course2
resource2.save()

key1 = "vocab_cache_{0}".format(self.resource.id)
key2 = "vocab_cache_{0}".format(resource2.id)

# Resources were recently saved, so the keys should be in the cache.
self.assertIn(key1, cache)
self.assertIn(key2, cache)

# Getting vocabs for a resource should not refresh the cache for
# resources in another course.
cache.clear()
self.resource.save()
self.assertIn(key1, cache)
self.assertNotIn(key2, cache)

# Resource for the same course are updated.
resource2.course = self.resource.course
resource2.save()
cache.clear()
self.resource.save()
self.assertIn(key1, cache)
self.assertIn(key2, cache)

0 comments on commit 72a60bc

Please sign in to comment.