Stripped caching out of vocabularies during indexing.

Fixes #556.
mitodl · Aug 21, 2015 · 72a60bc · 72a60bc
1 parent c5c73e2
commit 72a60bc
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 75 deletions.
diff --git a/search/search_indexes.py b/search/search_indexes.py
@@ -50,47 +50,20 @@ def get_course_metadata(course_id):
     return data
 
 
-def get_vocabs(course_id, resource_id, solo_update=False):
+def get_vocabs(resource_id):
     """
-    Caches and returns taxonomy metadata for a course.
+    Returns taxonomy metadata for a course.
     Args:
-        course_id (int): Primary key of Course
         resource_id (int): Primary key of LearningResource
     Returns:
         data (dict): Vocab/term data for course.
     """
-    key = "vocab_cache_{0}".format(resource_id)
-    cached = cache.get(key)
-    if (solo_update is False) and (cached is not None):
-        return cached
-
-    # Pre-populate the cache with blank values in case there are no
-    # terms for that LearningResource. Otherwise, looking up the vocabularies
-    # for that resource will refill the cache for the entire course. If there
-    # is already a value, retain it.
-    resource_ids = LearningResource.objects.filter(
-        course__id=course_id).values_list('id', flat=True)
-    for resource_id in resource_ids:
-        rkey = "vocab_cache_{0}".format(resource_id)
-        cache.set(rkey, cache.get(rkey, {}))
-    value = {}
-
-    term_cache = defaultdict(lambda: defaultdict(list))
+    data = defaultdict(list)
     rels = LearningResource.terms.related.through.objects.select_related(
-        "term").filter(learningresource__course__id=course_id)
-    if solo_update is True:
-        rels = rels.filter(learningresource_id=resource_id)
+        "term").filter(learningresource__id=resource_id)
     for rel in rels.iterator():
-        term_cache[rel.learningresource_id][
-            rel.term.vocabulary_id].append(rel.term_id)
-    for lid, data in term_cache.items():
-        lkey = "vocab_cache_{0}".format(lid)
-        if lkey == key:
-            value = dict(data)
-        cache.set(lkey, dict(data))
-    # Return `value` directly, not from the cache. Otherwise, if caching
-    # is disabled (TIMEOUT == 0), this will always return nothing.
-    return value
+        data[rel.term.vocabulary_id].append(rel.term_id)
+    return dict(data)
 
 
 class LearningResourceIndex(indexes.SearchIndex, indexes.Indexable):
@@ -182,7 +155,7 @@ def prepare(self, obj):
         as well, but don't because explicit is better than implicit.
         """
         prepared = super(LearningResourceIndex, self).prepare(obj)
-        for vocab_id, term_ids in get_vocabs(obj.course_id, obj.id).items():
+        for vocab_id, term_ids in get_vocabs(obj.id).items():
             # Use the integer primary keys as index values. This saves space,
             # and also avoids all issues dealing with "special" characters.
             prepared[vocab_id] = term_ids

diff --git a/search/signals.py b/search/signals.py
@@ -44,5 +44,5 @@ def handle_m2m_save(sender, **kwargs):
     if instance.__class__.__name__ != "LearningResource":
         return
     # Update cache for the LearningResource if it's already set.
-    get_vocabs(instance.course_id, instance.id, solo_update=True)
+    get_vocabs(instance.id)
     LearningResourceIndex().update_object(instance)
diff --git a/search/tests/test_indexing.py b/search/tests/test_indexing.py
@@ -205,11 +205,11 @@ def get_count():
             return count
 
         set_cache_timeout(0)
-        with self.assertNumQueries(23):
+        with self.assertNumQueries(20):
             self.assertEqual(get_count(), 0)
 
         set_cache_timeout(60)
-        with self.assertNumQueries(8):
+        with self.assertNumQueries(9):
             self.assertEqual(get_count(), 1)
 
     def test_course_cache(self):
@@ -234,14 +234,14 @@ def thrice(self):
         def three_times():
             """Get vocab data three times."""
             for _ in range(0, 3):
-                get_vocabs(self.course.id, self.resource.id)
+                get_vocabs(self.resource.id)
 
         set_cache_timeout(0)
-        with self.assertNumQueries(6):
+        with self.assertNumQueries(3):
             three_times()
 
         set_cache_timeout(60)
-        with self.assertNumQueries(2):
+        with self.assertNumQueries(3):
             three_times()
 
     def test_term_cache_with_data(self):
@@ -262,38 +262,3 @@ def test_term_cache_without_data(self):
         a LearningResource isn't tagged with any terms.
         """
         self.thrice()
-
-    def test_vocab_cache_by_course(self):
-        """
-        Ensure that get_vocabs for one course doesn't
-        pollute the cache for another course.
-        """
-        # Create a resource in a separate course.
-        self.course, course2 = copy_instance(self.course)
-        self.resource, resource2 = copy_instance(self.resource)
-        course2.run = "{0} part 2".format(self.course.run)
-        course2.save()
-        resource2.course = course2
-        resource2.save()
-
-        key1 = "vocab_cache_{0}".format(self.resource.id)
-        key2 = "vocab_cache_{0}".format(resource2.id)
-
-        # Resources were recently saved, so the keys should be in the cache.
-        self.assertIn(key1, cache)
-        self.assertIn(key2, cache)
-
-        # Getting vocabs for a resource should not refresh the cache for
-        # resources in another course.
-        cache.clear()
-        self.resource.save()
-        self.assertIn(key1, cache)
-        self.assertNotIn(key2, cache)
-
-        # Resource for the same course are updated.
-        resource2.course = self.resource.course
-        resource2.save()
-        cache.clear()
-        self.resource.save()
-        self.assertIn(key1, cache)
-        self.assertIn(key2, cache)