Merge pull request #1798 from jamalex/backport_next_steps

Backport next steps
learningequality · Jul 5, 2017 · 2d8f469 · 2d8f469
2 parents 70f14fb + 2db3820
commit 2d8f469
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 42 deletions.
diff --git a/kolibri/content/api.py b/kolibri/content/api.py
@@ -59,15 +59,12 @@ def filter_recommendations_for(self, queryset, value):
         """
         Recommend items that are similar to this piece of content.
         """
-        recc_node = queryset.get(pk=value)
-        descendants = recc_node.get_descendants(include_self=False).exclude(kind__in=['topic', ''])
-        siblings = recc_node.get_siblings(include_self=False).exclude(kind__in=['topic', ''])
-        data = descendants | siblings  # concatenates different querysets
-        return data
+        return queryset.get(pk=value).get_siblings(
+            include_self=False).order_by("lft").exclude(kind=content_kinds.TOPIC)
 
     def filter_next_steps(self, queryset, value):
         """
-        Recommend uncompleted content, content that has user completed content as a prerequisite.
+        Recommend content that has user completed content as a prerequisite, or leftward sibling.
 
         :param queryset: all content nodes for this channel
         :param value: id of currently logged in user, or none if user is anonymous
@@ -78,31 +75,19 @@ def filter_next_steps(self, queryset, value):
         if not value:
             return queryset.none()
 
-        tables = [
-            '"{summarylog_table}" AS "complete_log"',
-            '"{summarylog_table}" AS "incomplete_log"',
-            '"{content_table}" AS "complete_node"',
-            '"{content_table}" AS "incomplete_node"',
-        ]
-        table_names = {
-            "summarylog_table": ContentSummaryLog._meta.db_table,
-            "content_table": models.ContentNode._meta.db_table,
-        }
-        # aliases for sql table names
-        sql_tables_and_aliases = [table.format(**table_names) for table in tables]
-        # where conditions joined by ANDs
-        where_statements = ["NOT (incomplete_log.progress < 1 AND incomplete_log.content_id = incomplete_node.content_id)",
-                            "complete_log.user_id = '{user_id}'".format(user_id=value),
-                            "incomplete_log.user_id = '{user_id}'".format(user_id=value),
-                            "complete_log.progress = 1",
-                            "complete_node.rght = incomplete_node.lft - 1",
-                            "complete_log.content_id = complete_node.content_id"]
-        # custom SQL query to get uncompleted content based on mptt algorithm
-        next_steps_recommendations = "SELECT incomplete_node.* FROM {tables} WHERE {where}".format(
-            tables=", ".join(sql_tables_and_aliases),
-            where=_join_with_logical_operator(where_statements, "AND")
-        )
-        return models.ContentNode.objects.raw(next_steps_recommendations)
+        completed_content_ids = ContentSummaryLog.objects.filter(
+            user=value, progress=1).values_list('content_id', flat=True)
+
+        # If no logs, don't bother doing the other queries
+        if not completed_content_ids:
+            return queryset.none()
+
+        completed_content_nodes = queryset.filter(content_id__in=completed_content_ids).order_by()
+
+        return queryset.exclude(pk__in=completed_content_nodes).filter(
+            Q(has_prerequisite__in=completed_content_nodes) |
+            Q(lft__in=[rght + 1 for rght in completed_content_nodes.values_list('rght', flat=True)])
+        ).order_by()
 
     def filter_popular(self, queryset, value):
         """
@@ -114,8 +99,10 @@ def filter_popular(self, queryset, value):
         """
         if ContentSessionLog.objects.count() < 50:
             # return 25 random content nodes if not enough session logs
-            pks = queryset.values_list('pk', flat=True).exclude(kind__in=['topic', ''])
-            count = min(pks.count(), 25)
+            pks = queryset.values_list('pk', flat=True).exclude(kind=content_kinds.TOPIC)
+            # .count scales with table size, so can get slow on larger channels
+            count_cache_key = 'content_count_for_{}'.format(get_active_content_database())
+            count = cache.get(count_cache_key) or min(pks.count(), 25)
             return queryset.filter(pk__in=sample(list(pks), count))
 
         cache_key = 'popular_for_{}'.format(get_active_content_database())
@@ -156,6 +143,10 @@ def filter_resume(self, queryset, value):
             .values_list('content_id', flat=True) \
             .distinct()
 
+        # If no logs, don't bother doing the other queries
+        if not content_ids:
+            return queryset.none()
+
         resume = queryset.filter(content_id__in=list(content_ids[:10]))
 
         return resume
@@ -196,9 +187,7 @@ def get_queryset(self):
         return models.ContentNode.objects.all().prefetch_related(
             'assessmentmetadata',
             'files',
-        ).select_related(
-            'license',
-        )
+        ).select_related('license')
 
     @detail_route(methods=['get'])
     def descendants(self, request, **kwargs):

diff --git a/kolibri/content/test/test_content_app.py b/kolibri/content/test/test_content_app.py
@@ -268,9 +268,9 @@ def test_contentnode_field_filtering(self):
         self.assertTrue("pk" not in response.data)
 
     def test_contentnode_recommendations(self):
-        root_id = content.ContentNode.objects.get(title="root").id
-        response = self.client.get(self._reverse_channel_url("contentnode-list"), data={"recommendations_for": root_id})
-        self.assertEqual(len(response.data), 4)
+        id = content.ContentNode.objects.get(title="c2c2").id
+        response = self.client.get(self._reverse_channel_url("contentnode-list"), data={"recommendations_for": id})
+        self.assertEqual(len(response.data), 2)
 
     def test_channelmetadata_list(self):
         data = content.ChannelMetadata.objects.values()[0]

diff --git a/kolibri/plugins/learn/assets/src/state/actions.js b/kolibri/plugins/learn/assets/src/state/actions.js
@@ -16,6 +16,7 @@ const seededShuffle = require('kolibri.lib.seededshuffle');
 const { createQuestionList, selectQuestionFromExercise } = require('kolibri.utils.exams');
 const { assessmentMetaDataState } = require('kolibri.coreVue.vuex.mappers');
 const { now } = require('kolibri.utils.serverClock');
+const uniqBy = require('lodash/uniqBy');
 
 /**
  * Vuex State Mappers
@@ -295,9 +296,12 @@ function showLearnChannel(store, channelId, page = 1) {
         ([nextSteps, popular, resume]) => {
           const pageState = {
             recommendations: {
-              nextSteps: nextSteps.map(_contentState),
-              popular: popular.map(_contentState),
-              resume: resume.map(_contentState),
+              // Hard to guarantee this uniqueness on the database side, so
+              // do a uniqBy content_id here, to prevent confusing repeated
+              // content items.
+              nextSteps: uniqBy(nextSteps, 'content_id').map(_contentState),
+              popular: uniqBy(popular, 'content_id').map(_contentState),
+              resume: uniqBy(resume, 'content_id').map(_contentState),
             },
             all: {
               content: [],