From 92552e50d431292e35ad08688777e84d3dc9a23d Mon Sep 17 00:00:00 2001 From: Kyle McCormick Date: Thu, 20 Jan 2022 18:46:59 -0500 Subject: [PATCH 1/7] refactor: move coursegraph to cms This code was originally located at: ./openedx/core/djangoapps/coursegraph However, code makes more sense within the ./cms tree, because: * it is responsible for publishing course content to an external system, with is within the responsibilities of CMS, and * is uses modulestore, which is discouraged for use in LMS (see 0011-limit-modulestore-use-in-lms.rst). So, we move the code to: ./cms/djangoapps/coursegraph and uninstall coursegraph from LMS. We do not expect this refactor to have any breaking downstream effects. --- .github/workflows/unit-test-shards.json | 3 +- .../djangoapps/coursegraph/README.rst | 0 .../djangoapps/coursegraph/__init__.py | 0 .../djangoapps/coursegraph/apps.py | 4 +- .../coursegraph/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../management/commands/dump_to_neo4j.py | 2 +- .../management/commands/tests/__init__.py | 0 .../commands/tests/test_dump_to_neo4j.py | 44 +++++++++---------- .../management/commands/tests/utils.py | 0 .../djangoapps/coursegraph/tasks.py | 0 cms/envs/common.py | 2 +- cms/envs/production.py | 2 +- lms/envs/common.py | 4 -- 14 files changed, 28 insertions(+), 33 deletions(-) rename {openedx/core => cms}/djangoapps/coursegraph/README.rst (100%) rename {openedx/core => cms}/djangoapps/coursegraph/__init__.py (100%) rename {openedx/core => cms}/djangoapps/coursegraph/apps.py (65%) rename {openedx/core => cms}/djangoapps/coursegraph/management/__init__.py (100%) rename {openedx/core => cms}/djangoapps/coursegraph/management/commands/__init__.py (100%) rename {openedx/core => cms}/djangoapps/coursegraph/management/commands/dump_to_neo4j.py (97%) rename {openedx/core => cms}/djangoapps/coursegraph/management/commands/tests/__init__.py (100%) rename {openedx/core => cms}/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py (91%) rename {openedx/core => cms}/djangoapps/coursegraph/management/commands/tests/utils.py (100%) rename {openedx/core => cms}/djangoapps/coursegraph/tasks.py (100%) diff --git a/.github/workflows/unit-test-shards.json b/.github/workflows/unit-test-shards.json index f7cd11bf2c34..838d0fe261f2 100644 --- a/.github/workflows/unit-test-shards.json +++ b/.github/workflows/unit-test-shards.json @@ -99,7 +99,6 @@ "openedx/core/djangoapps/course_apps/", "openedx/core/djangoapps/course_date_signals/", "openedx/core/djangoapps/course_groups/", - "openedx/core/djangoapps/coursegraph/", "openedx/core/djangoapps/courseware_api/", "openedx/core/djangoapps/crawlers/", "openedx/core/djangoapps/credentials/", @@ -181,7 +180,6 @@ "openedx/core/djangoapps/course_apps/", "openedx/core/djangoapps/course_date_signals/", "openedx/core/djangoapps/course_groups/", - "openedx/core/djangoapps/coursegraph/", "openedx/core/djangoapps/courseware_api/", "openedx/core/djangoapps/crawlers/", "openedx/core/djangoapps/credentials/", @@ -240,6 +238,7 @@ "paths": [ "cms/djangoapps/api/", "cms/djangoapps/cms_user_tasks/", + "cms/djangoapps/coursegraph/", "cms/djangoapps/course_creators/", "cms/djangoapps/export_course_metadata/", "cms/djangoapps/maintenance/", diff --git a/openedx/core/djangoapps/coursegraph/README.rst b/cms/djangoapps/coursegraph/README.rst similarity index 100% rename from openedx/core/djangoapps/coursegraph/README.rst rename to cms/djangoapps/coursegraph/README.rst diff --git a/openedx/core/djangoapps/coursegraph/__init__.py b/cms/djangoapps/coursegraph/__init__.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/__init__.py rename to cms/djangoapps/coursegraph/__init__.py diff --git a/openedx/core/djangoapps/coursegraph/apps.py b/cms/djangoapps/coursegraph/apps.py similarity index 65% rename from openedx/core/djangoapps/coursegraph/apps.py rename to cms/djangoapps/coursegraph/apps.py index ecedf33d19ca..95d7873fce46 100644 --- a/openedx/core/djangoapps/coursegraph/apps.py +++ b/cms/djangoapps/coursegraph/apps.py @@ -12,6 +12,6 @@ class CoursegraphConfig(AppConfig): """ AppConfig for courseware app """ - name = 'openedx.core.djangoapps.coursegraph' + name = 'cms.djangoapps.coursegraph' - from openedx.core.djangoapps.coursegraph import tasks + from cms.djangoapps.coursegraph import tasks diff --git a/openedx/core/djangoapps/coursegraph/management/__init__.py b/cms/djangoapps/coursegraph/management/__init__.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/management/__init__.py rename to cms/djangoapps/coursegraph/management/__init__.py diff --git a/openedx/core/djangoapps/coursegraph/management/commands/__init__.py b/cms/djangoapps/coursegraph/management/commands/__init__.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/management/commands/__init__.py rename to cms/djangoapps/coursegraph/management/commands/__init__.py diff --git a/openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py b/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py similarity index 97% rename from openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py rename to cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py index 226b3c174a7a..5fcaefafcbd7 100644 --- a/openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py +++ b/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py @@ -9,7 +9,7 @@ from django.core.management.base import BaseCommand -from openedx.core.djangoapps.coursegraph.tasks import ModuleStoreSerializer +from cms.djangoapps.coursegraph.tasks import ModuleStoreSerializer log = logging.getLogger(__name__) diff --git a/openedx/core/djangoapps/coursegraph/management/commands/tests/__init__.py b/cms/djangoapps/coursegraph/management/commands/tests/__init__.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/management/commands/tests/__init__.py rename to cms/djangoapps/coursegraph/management/commands/tests/__init__.py diff --git a/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py b/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py similarity index 91% rename from openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py rename to cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py index 0d03f56d5fc5..37d2dfaf66e3 100644 --- a/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py +++ b/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py @@ -13,10 +13,10 @@ from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory import openedx.core.djangoapps.content.block_structure.config as block_structure_config -from openedx.core.djangoapps.content.block_structure.signals import update_block_structure_on_course_publish -from openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j import ModuleStoreSerializer -from openedx.core.djangoapps.coursegraph.management.commands.tests.utils import MockGraph, MockNodeMatcher -from openedx.core.djangoapps.coursegraph.tasks import ( +from cms.djangoapps.content.block_structure.signals import update_block_structure_on_course_publish +from cms.djangoapps.coursegraph.management.commands.dump_to_neo4j import ModuleStoreSerializer +from cms.djangoapps.coursegraph.management.commands.tests.utils import MockGraph, MockNodeMatcher +from cms.djangoapps.coursegraph.tasks import ( coerce_types, serialize_course, serialize_item, @@ -115,8 +115,8 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): Tests for the dump to neo4j management command """ - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.Graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') @ddt.data(1, 2) def test_dump_specific_courses(self, number_of_courses, mock_graph_class, mock_matcher_class): """ @@ -140,8 +140,8 @@ def test_dump_specific_courses(self, number_of_courses, mock_graph_class, mock_m number_rollbacks=0 ) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.Graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') def test_dump_skip_course(self, mock_graph_class, mock_matcher_class): """ Test that you can skip courses. @@ -166,8 +166,8 @@ def test_dump_skip_course(self, mock_graph_class, mock_matcher_class): number_rollbacks=0, ) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.Graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') def test_dump_skip_beats_specifying(self, mock_graph_class, mock_matcher_class): """ Test that if you skip and specify the same course, you'll skip it. @@ -193,8 +193,8 @@ def test_dump_skip_beats_specifying(self, mock_graph_class, mock_matcher_class): number_rollbacks=0, ) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.Graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') def test_dump_all_courses(self, mock_graph_class, mock_matcher_class): """ Test if you don't specify which courses to dump, then you'll dump @@ -395,8 +395,8 @@ def test_coerce_types(self, original_value, coerced_expected): coerced_value = coerce_types(original_value) assert coerced_value == coerced_expected - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.authenticate_and_create_graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') def test_dump_to_neo4j(self, mock_graph_constructor, mock_matcher_class): """ Tests the dump_to_neo4j method works against a mock @@ -423,8 +423,8 @@ def test_dump_to_neo4j(self, mock_graph_constructor, mock_matcher_class): assert len(mock_graph.nodes) == 11 self.assertCountEqual(submitted, self.course_strings) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.authenticate_and_create_graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') def test_dump_to_neo4j_rollback(self, mock_graph_constructor, mock_matcher_class): """ Tests that the the dump_to_neo4j method handles the case where there's @@ -447,8 +447,8 @@ def test_dump_to_neo4j_rollback(self, mock_graph_constructor, mock_matcher_class self.assertCountEqual(submitted, self.course_strings) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.authenticate_and_create_graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') @ddt.data((True, 2), (False, 0)) @ddt.unpack def test_dump_to_neo4j_cache( @@ -480,8 +480,8 @@ def test_dump_to_neo4j_cache( ) assert len(submitted) == expected_number_courses - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.authenticate_and_create_graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') def test_dump_to_neo4j_published(self, mock_graph_constructor, mock_matcher_class): """ Tests that we only dump those courses that have been published after @@ -506,8 +506,8 @@ def test_dump_to_neo4j_published(self, mock_graph_constructor, mock_matcher_clas assert len(submitted) == 1 assert submitted[0] == str(self.course.id) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.get_course_last_published') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.get_command_last_run') + @mock.patch('cms.djangoapps.coursegraph.tasks.get_course_last_published') + @mock.patch('cms.djangoapps.coursegraph.tasks.get_command_last_run') @ddt.data( ( str(datetime(2016, 3, 30)), str(datetime(2016, 3, 31)), diff --git a/openedx/core/djangoapps/coursegraph/management/commands/tests/utils.py b/cms/djangoapps/coursegraph/management/commands/tests/utils.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/management/commands/tests/utils.py rename to cms/djangoapps/coursegraph/management/commands/tests/utils.py diff --git a/openedx/core/djangoapps/coursegraph/tasks.py b/cms/djangoapps/coursegraph/tasks.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/tasks.py rename to cms/djangoapps/coursegraph/tasks.py diff --git a/cms/envs/common.py b/cms/envs/common.py index 2cad881cec78..31e73c2559d9 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -1611,7 +1611,7 @@ 'openedx.core.djangoapps.self_paced', # Coursegraph - 'openedx.core.djangoapps.coursegraph.apps.CoursegraphConfig', + 'cms.djangoapps.coursegraph.apps.CoursegraphConfig', # Credit courses 'openedx.core.djangoapps.credit.apps.CreditConfig', diff --git a/cms/envs/production.py b/cms/envs/production.py index 8b29e3d3f378..82244acbde9c 100644 --- a/cms/envs/production.py +++ b/cms/envs/production.py @@ -603,7 +603,7 @@ def get_env_setting(setting): 'queue': POLICY_CHANGE_GRADES_ROUTING_KEY}, 'cms.djangoapps.contentstore.tasks.update_search_index': { 'queue': UPDATE_SEARCH_INDEX_JOB_QUEUE}, - 'openedx.core.djangoapps.coursegraph.tasks.dump_course_to_neo4j': { + 'cms.djangoapps.coursegraph.tasks.dump_course_to_neo4j': { 'queue': COURSEGRAPH_JOB_QUEUE}, } diff --git a/lms/envs/common.py b/lms/envs/common.py index 83709b11db4b..bf81a9769ecb 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -3083,10 +3083,6 @@ def _make_locale_paths(settings): # pylint: disable=missing-function-docstring 'openedx.core.djangoapps.content.block_structure.apps.BlockStructureConfig', 'lms.djangoapps.course_blocks', - - # Coursegraph - 'openedx.core.djangoapps.coursegraph.apps.CoursegraphConfig', - # Mailchimp Syncing 'lms.djangoapps.mailing', From d8c6e81459dd0c3aaf40dfeaae505162851c6670 Mon Sep 17 00:00:00 2001 From: Kyle McCormick Date: Thu, 20 Jan 2022 18:30:41 -0500 Subject: [PATCH 2/7] docs: enrich dump_to_neo4j help text & docstrings Move most docs out of docstring and into programatically- displayable argument help text. Also, the 'Example Usage' was out of date. This commit updates it to: * use `./manage.py cms ...' instead of `./manage.py lms ...', and * use `--port` instead of `--https_port`. --- .../management/commands/dump_to_neo4j.py | 80 +++++++++++++------ 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py b/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py index 5fcaefafcbd7..6da97b4a22b9 100644 --- a/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py +++ b/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py @@ -1,6 +1,17 @@ """ This file contains a management command for exporting the modulestore to -neo4j, a graph database. +Neo4j, a graph database. + +Example usages: + + # Dump all courses published since last dump. + python manage.py cms dump_to_neo4j --host localhost --port 7473 \ + --secure --user user --password password + + # Specify certain courses instead of dumping all of them. + python manage.py cms dump_to_neo4j --host localhost --port 7473 \ + --secure --user user --password password \ + --courses 'course-v1:A+B+1' 'course-v1:A+B+2' """ @@ -16,38 +27,55 @@ class Command(BaseCommand): """ - Command to dump modulestore data to neo4j - - Takes the following named arguments: - host: the host of the neo4j server - port: the port on the neo4j server that accepts Bolt requests - secure: if set, connects to server over Bolt/TLS, otherwise uses Bolt - user: the username for the neo4j user - password: the user's password - courses: list of course key strings to serialize. If not specified, all - courses in the modulestore are serialized. - override: if true, dump all--or all specified--courses, regardless of when - they were last dumped. If false, or not set, only dump those courses that - were updated since the last time the command was run. - - Example usage: - python manage.py lms dump_to_neo4j --host localhost --https_port 7473 \ - --secure --user user --password password --settings=production + Dump recently-published course(s) over to a CourseGraph (Neo4j) instance. """ help = dedent(__doc__).strip() def add_arguments(self, parser): - parser.add_argument('--host', type=str) - parser.add_argument('--port', type=int, default=7687) - parser.add_argument('--secure', action='store_true') - parser.add_argument('--user', type=str) - parser.add_argument('--password', type=str) - parser.add_argument('--courses', type=str, nargs='*') - parser.add_argument('--skip', type=str, nargs='*') + parser.add_argument( + '--host', + type=str, + help="the hostname of the Neo4j server", + ) + parser.add_argument( + '--port', + type=int, + default=7687, + help="the port on the Neo4j server that accepts Bolt requests", + ) + parser.add_argument( + '--secure', + action='store_true', + help="connect to server over Bolt/TLS instead of plain unencrypted Bolt", + ) + parser.add_argument( + '--user', + type=str, + help="the username of the Neo4j user", + ) + parser.add_argument( + '--password', + type=str, + help="the password of the Neo4j user", + ) + parser.add_argument( + '--courses', + metavar='KEY', + type=str, + nargs='*', + help="keys of courses to serialize; if omitted all courses in system are serialized", + ) + parser.add_argument( + '--skip', + metavar='KEY', + type=str, + nargs='*', + help="keys of courses to NOT to serialize", + ) parser.add_argument( '--override', action='store_true', - help='dump all--or all specified--courses, ignoring cache', + help="dump all courses regardless of when they were last published", ) def handle(self, *args, **options): From 1935712bdca1963815c1d605a6121217241f641f Mon Sep 17 00:00:00 2001 From: Kyle McCormick Date: Thu, 20 Jan 2022 18:41:34 -0500 Subject: [PATCH 3/7] feat: specify dump_to_neo4j defaults in COURSEGRAPH_CONNECTION Introduce a new CMS settings COURSEGRAPH_CONNECTION, which allows operators to specify default connection paramters for a Neo4j instance. This has three purposes: * The `./manage.py cms dump_to_neo4j` management command will be much easier for developers and operators to type out because connection arguments can now be omitted. Note that connection arguments, if supplied, will override the arguments specified in CMS settings. * The automatic push-to-coursegraph-on-publish-signal introduced in subsequent commits can use these connection settings. * The CourseGraph Django admin actions introduced in subsequent commits can use these connection settings. --- .../management/commands/dump_to_neo4j.py | 19 ++++-- .../commands/tests/test_dump_to_neo4j.py | 40 ++++++++++++- cms/djangoapps/coursegraph/tasks.py | 59 +++++++++---------- cms/envs/common.py | 18 +++++- cms/envs/devstack.py | 11 ++++ 5 files changed, 109 insertions(+), 38 deletions(-) diff --git a/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py b/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py index 6da97b4a22b9..40afe7ffbe7d 100644 --- a/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py +++ b/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py @@ -5,13 +5,17 @@ Example usages: # Dump all courses published since last dump. + # Use connection parameters from `settings.COURSEGRAPH_SETTINGS`. + python manage.py cms dump_to_neo4j + + # Dump all courses published since last dump. + # Use custom connection parameters. python manage.py cms dump_to_neo4j --host localhost --port 7473 \ --secure --user user --password password # Specify certain courses instead of dumping all of them. - python manage.py cms dump_to_neo4j --host localhost --port 7473 \ - --secure --user user --password password \ - --courses 'course-v1:A+B+1' 'course-v1:A+B+2' + # Use connection parameters from `settings.COURSEGRAPH_SETTINGS`. + python manage.py cms dump_to_neo4j --courses 'course-v1:A+B+1' 'course-v1:A+B+2' """ @@ -40,7 +44,6 @@ def add_arguments(self, parser): parser.add_argument( '--port', type=int, - default=7687, help="the port on the Neo4j server that accepts Bolt requests", ) parser.add_argument( @@ -85,9 +88,13 @@ def handle(self, *args, **options): """ mss = ModuleStoreSerializer.create(options['courses'], options['skip']) - + connection_overrides = { + key: options[key] + for key in ["host", "port", "secure", "user", "password"] + } submitted_courses, skipped_courses = mss.dump_courses_to_neo4j( - options, override_cache=options['override'] + connection_overrides=connection_overrides, + override_cache=options['override'], ) log.info( diff --git a/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py b/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py index 37d2dfaf66e3..dd07e0f08a79 100644 --- a/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py +++ b/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py @@ -8,12 +8,13 @@ from unittest import mock import ddt from django.core.management import call_command +from django.test.utils import override_settings from edx_toggles.toggles.testutils import override_waffle_switch from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory import openedx.core.djangoapps.content.block_structure.config as block_structure_config -from cms.djangoapps.content.block_structure.signals import update_block_structure_on_course_publish +from openedx.core.djangoapps.content.block_structure.signals import update_block_structure_on_course_publish from cms.djangoapps.coursegraph.management.commands.dump_to_neo4j import ModuleStoreSerializer from cms.djangoapps.coursegraph.management.commands.tests.utils import MockGraph, MockNodeMatcher from cms.djangoapps.coursegraph.tasks import ( @@ -219,6 +220,43 @@ def test_dump_all_courses(self, mock_graph_class, mock_matcher_class): number_rollbacks=0, ) + @mock.patch('cms.djangoapps.coursegraph.tasks.Graph', autospec=True) + @override_settings( + COURSEGRAPH_CONNECTION=dict( + protocol='bolt', + host='coursegraph.example.edu', + port=7777, + secure=True, + user="neo4j", + password="default-password", + ) + ) + def test_dump_to_neo4j_connection_defaults(self, mock_graph_class): + """ + Test that user can override individual settings.COURSEGRAPH_CONNECTION parameters + by passing them to `dump_to_neo4j`, whilst falling back to the ones that they + don't override. + """ + call_command( + 'dump_to_neo4j', + courses=self.course_strings[:1], + port=7788, + secure=False, + password="overridden-password", + ) + mock_graph_class.assert_called_once_with( + + # From settings: + protocol='bolt', + host='coursegraph.example.edu', + user="neo4j", + + # Overriden by command: + port=7788, + secure=False, + password="overridden-password", + ) + class SomeThing: """Just to test the stringification of an object.""" diff --git a/cms/djangoapps/coursegraph/tasks.py b/cms/djangoapps/coursegraph/tasks.py index ec943e752764..2dc359974cf3 100644 --- a/cms/djangoapps/coursegraph/tasks.py +++ b/cms/djangoapps/coursegraph/tasks.py @@ -7,6 +7,7 @@ import logging from celery import shared_task +from django.conf import settings from django.utils import timezone from edx_django_utils.cache import RequestCache from edx_django_utils.monitoring import set_code_owner_attribute @@ -268,14 +269,14 @@ def should_dump_course(course_key, graph): @shared_task @set_code_owner_attribute -def dump_course_to_neo4j(course_key_string, credentials): +def dump_course_to_neo4j(course_key_string, connection_overrides=None): """ Serializes a course and writes it to neo4j. Arguments: - course_key: course key for the course to be exported - credentials (dict): the necessary credentials to connect - to neo4j and create a py2neo `Graph` obje + course_key_string: course key for the course to be exported + connection_overrides (dict): overrides to Neo4j connection + parameters specified in `settings.COURSEGRAPH_CONNECTION`. """ course_key = CourseKey.from_string(course_key_string) nodes, relationships = serialize_course(course_key) @@ -286,7 +287,9 @@ def dump_course_to_neo4j(course_key_string, credentials): len(relationships), ) - graph = authenticate_and_create_graph(credentials) + graph = authenticate_and_create_graph( + connection_overrides=connection_overrides + ) transaction = graph.begin() course_string = str(course_key) @@ -346,13 +349,13 @@ def create(cls, courses=None, skip=None): course_keys = [course_key for course_key in course_keys if course_key not in skip_keys] return cls(course_keys) - def dump_courses_to_neo4j(self, credentials, override_cache=False): + def dump_courses_to_neo4j(self, connection_overrides=None, override_cache=False): """ Method that iterates through a list of courses in a modulestore, serializes them, then submits tasks to write them to neo4j. Arguments: - credentials (dict): the necessary credentials to connect - to neo4j and create a py2neo `Graph` object + connection_overrides (dict): overrides to Neo4j connection + parameters specified in `settings.COURSEGRAPH_CONNECTION`. override_cache: serialize the courses even if they'be been recently serialized @@ -365,7 +368,7 @@ def dump_courses_to_neo4j(self, credentials, override_cache=False): submitted_courses = [] skipped_courses = [] - graph = authenticate_and_create_graph(credentials) + graph = authenticate_and_create_graph(connection_overrides) for index, course_key in enumerate(self.course_keys): # first, clear the request cache to prevent memory leaks @@ -389,36 +392,32 @@ def dump_courses_to_neo4j(self, credentials, override_cache=False): ) dump_course_to_neo4j.apply_async( - args=[str(course_key), credentials], + kwargs=dict( + course_key_string=str(course_key), + connection_overrides=connection_overrides, + ) ) submitted_courses.append(str(course_key)) return submitted_courses, skipped_courses -def authenticate_and_create_graph(credentials): +def authenticate_and_create_graph(connection_overrides=None): """ This function authenticates with neo4j and creates a py2neo graph object + Arguments: - credentials (dict): a dictionary of credentials used to authenticate, - and then create, a py2neo graph object. + connection_overrides (dict): overrides to Neo4j connection + parameters specified in `settings.COURSEGRAPH_CONNECTION`. Returns: a py2neo `Graph` object. """ - - host = credentials['host'] - port = credentials['port'] - secure = credentials['secure'] - neo4j_user = credentials['user'] - neo4j_password = credentials['password'] - - graph = Graph( - protocol='bolt', - password=neo4j_password, - user=neo4j_user, - address=host, - port=port, - secure=secure, - ) - - return graph + provided_overrides = { + key: value + for key, value in (connection_overrides or {}).items() + # Drop overrides whose values are `None`. Note that `False` is a + # legitimate override value that we don't want to drop here. + if value is not None + } + connection_with_overrides = {**settings.COURSEGRAPH_CONNECTION, **provided_overrides} + return Graph(**connection_with_overrides) diff --git a/cms/envs/common.py b/cms/envs/common.py index 31e73c2559d9..91cbbbc2156b 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -2235,7 +2235,23 @@ # .. setting_default: value of LOW_PRIORITY_QUEUE # .. setting_description: The name of the Celery queue to which CourseGraph refresh # tasks will be sent -COURSEGRAPH_JOB_QUEUE = LOW_PRIORITY_QUEUE +COURSEGRAPH_JOB_QUEUE: str = LOW_PRIORITY_QUEUE + +# .. setting_name: COURSEGRAPH_CONNECTION +# .. setting_default: 'bolt+s://localhost:7687', in dictionary form. +# .. setting_description: Dictionary specifying Neo4j connection parameters for +# CourseGraph refresh. Accepted keys are protocol ('bolt' or 'http'), +# secure (bool), host (str), port (int), user (str), and password (str). +# See https://py2neo.org/2021.1/profiles.html#individual-settings for a +# a description of each of those keys. +COURSEGRAPH_CONNECTION: dict = { + "protocol": "bolt", + "secure": True, + "host": "localhost", + "port": 7687, + "user": "neo4j", + "password": None, +} ########## Settings for video transcript migration tasks ############ VIDEO_TRANSCRIPT_MIGRATIONS_JOB_QUEUE = DEFAULT_PRIORITY_QUEUE diff --git a/cms/envs/devstack.py b/cms/envs/devstack.py index 6b632ddaf11f..6c67d4d3891e 100644 --- a/cms/envs/devstack.py +++ b/cms/envs/devstack.py @@ -256,6 +256,17 @@ def should_show_debug_toolbar(request): # lint-amnesty, pylint: disable=missing # (ref MST-637) PROCTORING_USER_OBFUSCATION_KEY = '85920908f28904ed733fe576320db18cabd7b6cd' +############## CourseGraph devstack settings ############################ + +COURSEGRAPH_CONNECTION: dict = { + "protocol": "bolt", + "secure": False, + "host": "edx.devstack.coursegraph", + "port": 7687, + "user": "neo4j", + "password": "edx", +} + #################### Webpack Configuration Settings ############################## WEBPACK_LOADER['DEFAULT']['TIMEOUT'] = 5 From a92b09e08e4937727c8fd5d371dc10924ed696f1 Mon Sep 17 00:00:00 2001 From: Kyle McCormick Date: Mon, 24 Jan 2022 11:51:56 -0500 Subject: [PATCH 4/7] feat: dump to cousegraph on course publish signal Previously, CourseGraph needed to be kept up-to-date by running `./manage.py dump_to_neo4j ...` manually or on a cron timer. This introduces a CMS new setting: COURSEGRAPH_DUMP_COURSE_ON_PUBLISH. When enabled, the CMS course_published signal handler will asynchronously dump each individual course to CourseGraph when it is published. This follows a pattern established by other subsystems like learning_sequences and special exam registration, both of which fire off asynchronous post-processing tasks from the course- publish handler. --- cms/djangoapps/contentstore/signals/handlers.py | 8 ++++++++ cms/envs/common.py | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/cms/djangoapps/contentstore/signals/handlers.py b/cms/djangoapps/contentstore/signals/handlers.py index a67f9583814a..9945277a1379 100644 --- a/cms/djangoapps/contentstore/signals/handlers.py +++ b/cms/djangoapps/contentstore/signals/handlers.py @@ -5,6 +5,7 @@ from datetime import datetime from functools import wraps +from django.conf import settings from django.core.cache import cache from django.dispatch import receiver from pytz import UTC @@ -55,6 +56,9 @@ def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable= update_search_index, update_special_exams_and_publish ) + from cms.djangoapps.coursegraph.tasks import ( + dump_course_to_neo4j + ) # register special exams asynchronously course_key_str = str(course_key) @@ -64,6 +68,10 @@ def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable= # Push the course outline to learning_sequences asynchronously. update_outline_from_modulestore_task.delay(course_key_str) + if settings.COURSEGRAPH_DUMP_COURSE_ON_PUBLISH: + # Push the course out to CourseGraph asynchronously. + dump_course_to_neo4j.delay(course_key_str) + # Finally, call into the course search subsystem # to kick off an indexing action if CoursewareSearchIndexer.indexing_is_enabled() and CourseAboutSearchIndexer.indexing_is_enabled(): diff --git a/cms/envs/common.py b/cms/envs/common.py index 91cbbbc2156b..759f474d68e9 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -2253,6 +2253,16 @@ "password": None, } +# .. toggle_name: COURSEGRAPH_DUMP_COURSE_ON_PUBLISH +# .. toggle_implementation: DjangoSetting +# .. toggle_creation_date: 2022-01-27 +# .. toggle_use_cases: open_edx +# .. toggle_default: False +# .. toggle_description: Whether, upon publish, a course should automatically +# be exported to Neo4j via the connection parameters specified in +# `COURSEGRAPH_CONNECTION`. +COURSEGRAPH_DUMP_COURSE_ON_PUBLISH: bool = False + ########## Settings for video transcript migration tasks ############ VIDEO_TRANSCRIPT_MIGRATIONS_JOB_QUEUE = DEFAULT_PRIORITY_QUEUE From 65d7449e17efbc80a9dd284e892c21dba07900ac Mon Sep 17 00:00:00 2001 From: Kyle McCormick Date: Mon, 7 Feb 2022 16:13:53 -0500 Subject: [PATCH 5/7] refactor: read course publish date from overview, not block structure The `get_course_last_published` function is used by CourseGraph to determine whether or not a course should be dumped to Neo4j. If the course hasn't been published since it was last dumped to Neo4j, then it can be skipped (unless the override_cache option is enabled). The function was previously built using the BlockStructure data model. While this worked fine in Production instances that enable `block_structure.storage_backing_for_cache`, this implementation did NOT work in development environments, which do not use the BlockStrcture model. Instead, we switch to using CourseOverview.modified to approximate when a course was last published. This is method has fewer moving parts and is universally available across instances. --- .../commands/tests/test_dump_to_neo4j.py | 9 ++++-- cms/djangoapps/coursegraph/tasks.py | 31 +++++++++---------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py b/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py index dd07e0f08a79..32fef0a88711 100644 --- a/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py +++ b/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py @@ -220,6 +220,7 @@ def test_dump_all_courses(self, mock_graph_class, mock_matcher_class): number_rollbacks=0, ) + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') @mock.patch('cms.djangoapps.coursegraph.tasks.Graph', autospec=True) @override_settings( COURSEGRAPH_CONNECTION=dict( @@ -231,12 +232,15 @@ def test_dump_all_courses(self, mock_graph_class, mock_matcher_class): password="default-password", ) ) - def test_dump_to_neo4j_connection_defaults(self, mock_graph_class): + def test_dump_to_neo4j_connection_defaults(self, mock_graph_class, mock_matcher_class): """ Test that user can override individual settings.COURSEGRAPH_CONNECTION parameters by passing them to `dump_to_neo4j`, whilst falling back to the ones that they don't override. """ + self.setup_mock_graph( + mock_matcher_class, mock_graph_class + ) call_command( 'dump_to_neo4j', courses=self.course_strings[:1], @@ -244,7 +248,8 @@ def test_dump_to_neo4j_connection_defaults(self, mock_graph_class): secure=False, password="overridden-password", ) - mock_graph_class.assert_called_once_with( + assert mock_graph_class.call_args.args == () + assert mock_graph_class.call_args.kwargs == dict( # From settings: protocol='bolt', diff --git a/cms/djangoapps/coursegraph/tasks.py b/cms/djangoapps/coursegraph/tasks.py index 2dc359974cf3..e2d4bf5b0976 100644 --- a/cms/djangoapps/coursegraph/tasks.py +++ b/cms/djangoapps/coursegraph/tasks.py @@ -134,29 +134,26 @@ def get_command_last_run(course_key, graph): def get_course_last_published(course_key): """ - We use the CourseStructure table to get when this course was last - published. + Approximately when was a course last published? + + We use the 'modified' column in the CourseOverview table as a quick and easy + (although perhaps inexact) way of determining when a course was last + published. This works because CourseOverview rows are re-written upon + course publish. + Args: course_key: a CourseKey - Returns: The datetime the course was last published at, converted into - text, or None, if there's no record of the last time this course - was published. + Returns: The datetime the course was last published at, stringified. + Uses Python's default str(...) implementation for datetimes, which + is sortable and similar to ISO 8601: + https://docs.python.org/3/library/datetime.html#datetime.date.__str__ """ # Import is placed here to avoid model import at project startup. - from xmodule.modulestore.django import modulestore - from openedx.core.djangoapps.content.block_structure.models import BlockStructureModel - from openedx.core.djangoapps.content.block_structure.exceptions import BlockStructureNotFound - - store = modulestore() - course_usage_key = store.make_course_usage_key(course_key) - try: - structure = BlockStructureModel.get(course_usage_key) - course_last_published_date = str(structure.modified) - except BlockStructureNotFound: - course_last_published_date = None + from openedx.core.djangoapps.content.course_overviews.models import CourseOverview - return course_last_published_date + approx_last_published = CourseOverview.get_from_id(course_key).modified + return str(approx_last_published) def strip_branch_and_version(location): From a34305d4d37baa7d2cd44dc2653c602fe6a42208 Mon Sep 17 00:00:00 2001 From: Kyle McCormick Date: Mon, 24 Jan 2022 11:52:49 -0500 Subject: [PATCH 6/7] feat: add admin action for dump to coursegraph This introduces two admin actions: * Dump to CourseGraph (respect cache), and * Dump to CourseGraph (override cache) which allow admins to select a collection of courses from Django admin and dump them to the Neo4j instance specified by settings.COURSEGRAPH_CONNECTION, with or without respecting the cache (that is: whether the course has already been dumped since its last publishing). --- cms/djangoapps/coursegraph/admin.py | 123 ++++++++++ cms/djangoapps/coursegraph/models.py | 21 ++ cms/djangoapps/coursegraph/tests/__init__.py | 0 .../coursegraph/tests/test_admin.py | 227 ++++++++++++++++++ 4 files changed, 371 insertions(+) create mode 100644 cms/djangoapps/coursegraph/admin.py create mode 100644 cms/djangoapps/coursegraph/models.py create mode 100644 cms/djangoapps/coursegraph/tests/__init__.py create mode 100644 cms/djangoapps/coursegraph/tests/test_admin.py diff --git a/cms/djangoapps/coursegraph/admin.py b/cms/djangoapps/coursegraph/admin.py new file mode 100644 index 000000000000..f79fa909d286 --- /dev/null +++ b/cms/djangoapps/coursegraph/admin.py @@ -0,0 +1,123 @@ +""" +Admin site bindings for coursegraph +""" +import logging + +from django.contrib import admin, messages +from django.utils.translation import gettext as _ +from edx_django_utils.admin.mixins import ReadOnlyAdminMixin + +from .models import CourseGraphCourseDump +from .tasks import ModuleStoreSerializer + +log = logging.getLogger(__name__) + + +@admin.action( + permissions=['change'], + description=_("Dump courses to CourseGraph (respect cache)"), +) +def dump_courses(modeladmin, request, queryset): + """ + Admin action to enqueue Dump-to-CourseGraph tasks for a set of courses, + excluding courses that haven't been published since they were last dumped. + + queryset is a QuerySet of CourseGraphCourseDump objects, which are just + CourseOverview objects under the hood. + """ + all_course_keys = queryset.values_list('id', flat=True) + serializer = ModuleStoreSerializer(all_course_keys) + try: + submitted, skipped = serializer.dump_courses_to_neo4j() + # Unfortunately there is no unified base class for the reasonable + # exceptions we could expect from py2neo (connection unavailable, bolt protocol + # error, and so on), so we just catch broadly, show a generic error banner, + # and then log the exception for site operators to look at. + except Exception as err: # pylint: disable=broad-except + log.exception( + "Failed to enqueue CourseGraph dumps to Neo4j (respecting cache): %s", + ", ".join(str(course_key) for course_key in all_course_keys), + ) + modeladmin.message_user( + request, + _("Error enqueueing dumps for {} course(s): {}").format( + len(all_course_keys), str(err) + ), + level=messages.ERROR, + ) + return + if submitted: + modeladmin.message_user( + request, + _( + "Enqueued dumps for {} course(s). Skipped {} unchanged course(s)." + ).format(len(submitted), len(skipped)), + level=messages.SUCCESS, + ) + else: + modeladmin.message_user( + request, + _( + "Skipped all {} course(s), as they were unchanged.", + ).format(len(skipped)), + level=messages.WARNING, + ) + + +@admin.action( + permissions=['change'], + description=_("Dump courses to CourseGraph (override cache)") +) +def dump_courses_overriding_cache(modeladmin, request, queryset): + """ + Admin action to enqueue Dump-to-CourseGraph tasks for a set of courses + (whether or not they have been published recently). + + queryset is a QuerySet of CourseGraphCourseDump objects, which are just + CourseOverview objects under the hood. + """ + all_course_keys = queryset.values_list('id', flat=True) + serializer = ModuleStoreSerializer(all_course_keys) + try: + submitted, _skipped = serializer.dump_courses_to_neo4j(override_cache=True) + # Unfortunately there is no unified base class for the reasonable + # exceptions we could expect from py2neo (connection unavailable, bolt protocol + # error, and so on), so we just catch broadly, show a generic error banner, + # and then log the exception for site operators to look at. + except Exception as err: # pylint: disable=broad-except + log.exception( + "Failed to enqueue CourseGraph Neo4j course dumps (overriding cache): %s", + ", ".join(str(course_key) for course_key in all_course_keys), + ) + modeladmin.message_user( + request, + _("Error enqueueing dumps for {} course(s): {}").format( + len(all_course_keys), str(err) + ), + level=messages.ERROR, + ) + return + modeladmin.message_user( + request, + _("Enqueued dumps for {} course(s).").format(len(submitted)), + level=messages.SUCCESS, + ) + + +@admin.register(CourseGraphCourseDump) +class CourseGraphCourseDumpAdmin(ReadOnlyAdminMixin, admin.ModelAdmin): + """ + Model admin for "Course graph course dumps". + + Just a read-only table with some useful metadata, allowing admin users to + select courses to be dumped to CourseGraph. + """ + list_display = [ + 'id', + 'display_name', + 'modified', + 'enrollment_start', + 'enrollment_end', + ] + search_fields = ['id', 'display_name'] + actions = [dump_courses, dump_courses_overriding_cache] diff --git a/cms/djangoapps/coursegraph/models.py b/cms/djangoapps/coursegraph/models.py new file mode 100644 index 000000000000..f053dc9993ac --- /dev/null +++ b/cms/djangoapps/coursegraph/models.py @@ -0,0 +1,21 @@ +""" +(Proxy) models supporting CourseGraph. +""" + +from openedx.core.djangoapps.content.course_overviews.models import CourseOverview + + +class CourseGraphCourseDump(CourseOverview): + """ + Proxy model for CourseOverview. + + Does *not* create/update/delete CourseOverview objects - only reads the objects. + Uses the course IDs of the CourseOverview objects to determine which courses + can be dumped to CourseGraph. + """ + class Meta: + proxy = True + + def __str__(self): + """Represent ourselves with the course key.""" + return str(self.id) diff --git a/cms/djangoapps/coursegraph/tests/__init__.py b/cms/djangoapps/coursegraph/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/cms/djangoapps/coursegraph/tests/test_admin.py b/cms/djangoapps/coursegraph/tests/test_admin.py new file mode 100644 index 000000000000..21a26d84505b --- /dev/null +++ b/cms/djangoapps/coursegraph/tests/test_admin.py @@ -0,0 +1,227 @@ +""" +Shallow tests for CourseGraph dump-queueing Django admin interface. + +See ..management.commands.tests.test_dump_to_neo4j for more comprehensive +tests of dump_course_to_neo4j. +""" + +from unittest import mock + +import py2neo +from django.test import TestCase +from django.test.utils import override_settings +from freezegun import freeze_time + +from openedx.core.djangoapps.content.course_overviews.tests.factories import CourseOverviewFactory +from openedx.core.djangoapps.content.course_overviews.models import CourseOverview + +from .. import admin, tasks + + +_coursegraph_connection = { + "protocol": "bolt", + "secure": True, + "host": "example.edu", + "port": 7687, + "user": "neo4j", + "password": "fake-coursegraph-password", +} + +_configure_coursegraph_connection = override_settings( + COURSEGRAPH_CONNECTION=_coursegraph_connection, +) + +_patch_log_exception = mock.patch.object( + admin.log, 'exception', autospec=True +) + +_patch_apply_dump_task = mock.patch.object( + tasks.dump_course_to_neo4j, 'apply_async' +) + +_pretend_last_course_dump_was_may_2020 = mock.patch.object( + tasks, + 'get_command_last_run', + new=(lambda _key, _graph: "2020-05-01"), +) + +_patch_neo4j_graph = mock.patch.object( + tasks, 'Graph', autospec=True +) + +_make_neo4j_graph_raise = mock.patch.object( + tasks, 'Graph', side_effect=py2neo.ConnectionUnavailable( + 'we failed to connect or something!' + ) +) + + +class CourseGraphAdminActionsTestCase(TestCase): + """ + Test CourseGraph Django admin actions. + """ + + @classmethod + def setUpTestData(cls): + """ + Make course overviews with varying modification dates. + """ + super().setUpTestData() + cls.course_updated_in_april = CourseOverviewFactory(run='april_update') + cls.course_updated_in_june = CourseOverviewFactory(run='june_update') + cls.course_updated_in_july = CourseOverviewFactory(run='july_update') + cls.course_updated_in_august = CourseOverviewFactory(run='august_update') + + # For each course overview, make an arbitrary update and then save() + # so that its `.modified` date is set. + with freeze_time("2020-04-01"): + cls.course_updated_in_april.marketing_url = "https://example.org" + cls.course_updated_in_april.save() + with freeze_time("2020-06-01"): + cls.course_updated_in_june.marketing_url = "https://example.org" + cls.course_updated_in_june.save() + with freeze_time("2020-07-01"): + cls.course_updated_in_july.marketing_url = "https://example.org" + cls.course_updated_in_july.save() + with freeze_time("2020-08-01"): + cls.course_updated_in_august.marketing_url = "https://example.org" + cls.course_updated_in_august.save() + + @_configure_coursegraph_connection + @_pretend_last_course_dump_was_may_2020 + @_patch_neo4j_graph + @_patch_apply_dump_task + @_patch_log_exception + def test_dump_courses(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph): + """ + Test that dump_courses admin action dumps requested courses iff they have + been modified since the last dump to coursegraph. + """ + modeladmin_mock = mock.MagicMock() + + # Request all courses except the August-updated one + requested_course_keys = { + str(self.course_updated_in_april.id), + str(self.course_updated_in_june.id), + str(self.course_updated_in_july.id), + } + admin.dump_courses( + modeladmin=modeladmin_mock, + request=mock.MagicMock(), + queryset=CourseOverview.objects.filter(id__in=requested_course_keys), + ) + + # User should have been messaged + assert modeladmin_mock.message_user.call_count == 1 + assert modeladmin_mock.message_user.call_args.args[1] == ( + "Enqueued dumps for 2 course(s). Skipped 1 unchanged course(s)." + ) + + # For enqueueing, graph should've been authenticated once, using configured settings. + assert mock_neo4j_graph.call_count == 1 + assert mock_neo4j_graph.call_args.args == () + assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection + + # No errors should've been logged. + assert mock_log_exception.call_count == 0 + + # April course should have been skipped because the command was last run in May. + # Dumps for June and July courses should have been enqueued. + assert mock_apply_dump_task.call_count == 2 + actual_dumped_course_keys = { + call_args.kwargs['kwargs']['course_key_string'] + for call_args in mock_apply_dump_task.call_args_list + } + expected_dumped_course_keys = { + str(self.course_updated_in_june.id), + str(self.course_updated_in_july.id), + } + assert actual_dumped_course_keys == expected_dumped_course_keys + + @_configure_coursegraph_connection + @_pretend_last_course_dump_was_may_2020 + @_patch_neo4j_graph + @_patch_apply_dump_task + @_patch_log_exception + def test_dump_courses_overriding_cache(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph): + """ + Test that dump_coursese_overriding_cach admin action dumps requested courses + whether or not they been modified since the last dump to coursegraph. + """ + modeladmin_mock = mock.MagicMock() + + # Request all courses except the August-updated one + requested_course_keys = { + str(self.course_updated_in_april.id), + str(self.course_updated_in_june.id), + str(self.course_updated_in_july.id), + } + admin.dump_courses_overriding_cache( + modeladmin=modeladmin_mock, + request=mock.MagicMock(), + queryset=CourseOverview.objects.filter(id__in=requested_course_keys), + ) + + # User should have been messaged + assert modeladmin_mock.message_user.call_count == 1 + assert modeladmin_mock.message_user.call_args.args[1] == ( + "Enqueued dumps for 3 course(s)." + ) + + # For enqueueing, graph should've been authenticated once, using configured settings. + assert mock_neo4j_graph.call_count == 1 + assert mock_neo4j_graph.call_args.args == () + assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection + + # No errors should've been logged. + assert mock_log_exception.call_count == 0 + + # April, June, and July courses should have all been dumped. + assert mock_apply_dump_task.call_count == 3 + actual_dumped_course_keys = { + call_args.kwargs['kwargs']['course_key_string'] + for call_args in mock_apply_dump_task.call_args_list + } + expected_dumped_course_keys = { + str(self.course_updated_in_april.id), + str(self.course_updated_in_june.id), + str(self.course_updated_in_july.id), + } + assert actual_dumped_course_keys == expected_dumped_course_keys + + @_configure_coursegraph_connection + @_pretend_last_course_dump_was_may_2020 + @_make_neo4j_graph_raise + @_patch_apply_dump_task + @_patch_log_exception + def test_dump_courses_error(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph): + """ + Test that the dump_courses admin action dumps messages the user if an error + occurs when trying to enqueue course dumps. + """ + modeladmin_mock = mock.MagicMock() + + # Request dump of all four courses. + admin.dump_courses( + modeladmin=modeladmin_mock, + request=mock.MagicMock(), + queryset=CourseOverview.objects.all() + ) + + # Admin user should have been messaged about failure. + assert modeladmin_mock.message_user.call_count == 1 + assert modeladmin_mock.message_user.call_args.args[1] == ( + "Error enqueueing dumps for 4 course(s): we failed to connect or something!" + ) + + # For enqueueing, graph should've been authenticated once, using configured settings. + assert mock_neo4j_graph.call_count == 1 + assert mock_neo4j_graph.call_args.args == () + assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection + + # Exception should have been logged. + assert mock_log_exception.call_count == 1 + assert "Failed to enqueue" in mock_log_exception.call_args.args[0] + + # No courses should have been dumped. + assert mock_apply_dump_task.call_count == 0 From c9d31c6de107e186459dc663ad28296d2ab741e5 Mon Sep 17 00:00:00 2001 From: Kyle McCormick Date: Mon, 28 Mar 2022 14:45:00 -0400 Subject: [PATCH 7/7] docs: update CourseGraph README, notably w.r.t. new Tutor plugin Update the README of the CMS's CourseGraph support app: * Point to the newly-developed CourseGraph plugin for Tutor, and remove some prose that's now redundant with the Tutor plugin's README. * Add a link to the now-public CourseGraph Queries wiki page. * Capitalize the G in CourseGraph. * Fix a couple misc. formatting things. --- cms/djangoapps/coursegraph/README.rst | 49 +++++++++++++++++++-------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/cms/djangoapps/coursegraph/README.rst b/cms/djangoapps/coursegraph/README.rst index 0f9089ea42f2..18f0f60bdd33 100644 --- a/cms/djangoapps/coursegraph/README.rst +++ b/cms/djangoapps/coursegraph/README.rst @@ -1,37 +1,54 @@ -Coursegraph Support +CourseGraph Support ------------------- -This app exists to write data to "Coursegraph", a tool enabling Open edX developers and support specialists to inspect their platform instance's learning content. Coursegraph itself is simply an instance of Neo4j, which is an open-source graph database with a web interface. +This app exists to write data to "CourseGraph", a tool enabling Open edX developers and support specialists to inspect their platform instance's learning content. CourseGraph itself is simply an instance of `Neo4j`_, which is an open-source graph database with a Web interface. + +.. _Neo4j: https://neo4j.com Deploying Coursegraph ===================== -As of the Maple Open edX release, Coursegraph is *not* automatically provisioned by the community installation, and is *not* considered a "supported" part of the platform. However, operators may find the `neo4j Ansible playbook`_ useful as a starting point for deploying their own Coursegraph instance. Alternatively, Neo4j also maintains an official `Docker image`_. +There are two ways to deploy CourseGraph: + +* For operators using Tutor, there is a `CourseGraph plugin for Tutor`_ that is currently released as "Beta". Nutmeg is the earliest Open edX release that the plugin will work alongside. + +* For operators still using the old Ansible installation pathway, there exists a `neo4j Ansible playbook`_. Be warned that this method is not well-documented nor officially supported. -In order for Coursegraph to have queryable data, learning content from LMS must be written to Coursegraph using the ``dump_to_neo4j`` management command included in this app. In order for the data to stay up to date, it must be periodically refreshed, either manually or via an automation server such as Jenkins. +In order for CourseGraph to have queryable, up-to-date data, learning content from CMS must be written to CourseGraph regularly. That is where this Django app comes into play. For details on the various ways to write CMS data to CourseGraph, visit the `operations section of the CourseGraph Tutor plugin docs`_. -**Please note**: Access to a populated Coursegraph instance confers access to all the learning content in the related Open edX LMS/CMS. The basic authentication provided by Neo4j may or may not be sufficient for your security needs. Consider taking additional security measures, such as restricting Coursegraph access to only users on a private VPN. +**Please note**: Access to a populated CourseGraph instance confers access to all the learning content in the associated Open edX CMS (Studio). The basic authentication provided by Neo4j may or may not be sufficient for your security needs. Consider taking additional security measures, such as restricting CourseGraph access to only users on a private VPN. .. _neo4j Ansible playbook: https://github.com/edx/configuration/blob/master/playbooks/neo4j.yml -.. _Docker image: https://neo4j.com/developer/docker-run-neo4j/ +.. _CourseGraph plugin for Tutor: https://github.com/openedx/tutor-contrib-coursegraph/ + +.. _operations section of the CourseGraph Tutor plugin docs: https://github.com/openedx/tutor-contrib-coursegraph/#managing-data + +Running CourseGraph locally +=========================== +In some circumstances, you may want to run CourseGraph locally, connected to a development-mode Open edX instance. You can do this in both Tutor and Devstack. -Coursegraph in Devstack -======================= +Tutor +***** -Coursegraph is included as an "extra" component in the `Open edX Devstack`_. That is, it is not run or provisioned by default, but can be enabled on-demand. +The `CourseGraph plugin for Tutor`_ makes it easy to install, configure, and run CourseGraph for local development. -To provision Devstack Coursegraph with data from Devstack LMS, run:: +Devstack +******** + +CourseGraph is included as an "extra" component in the `Open edX Devstack`_. That is, it is not run or provisioned by default, but can be enabled on-demand. + +To provision Devstack CourseGraph with data from Devstack LMS, run:: make dev.provision.coursegraph -Coursegraph should now be accessible at http://localhost:7474 with the username ``neo4j`` and the password ``edx``. +CourseGraph should now be accessible at http://localhost:7474 with the username ``neo4j`` and the password ``edx``. -Under the hood, the provisioning command just invokes ``dump_to_neo4j`` on your LMS, pointed at your Coursegraph. The provisioning command can be run again at any point in the future to refresh Coursegraph with new LMS data. The data in Coursegraph will persist unless you explicitly destroy it (as noted below). +Under the hood, the provisioning command just invokes ``dump_to_neo4j`` on your LMS, pointed at your CourseGraph. The provisioning command can be run again at any point in the future to refresh CourseGraph with new LMS data. The data in CourseGraph will persist unless you explicitly destroy it (as noted below). -Other Devstack Coursegraph commands include:: +Other Devstack CourseGraph commands include:: make dev.up.coursegraph # Bring up the container (without re-provisioning). make dev.down.coursegraph # Stop and remove the container. @@ -48,7 +65,7 @@ The above commands should be run in your ``devstack`` folder, and they assume th Querying Coursegraph ==================== -Coursegraph is queryable using the `Cypher`_ query language. Open edX learning content is represented in Neo4j using a straightforward scheme: +CourseGraph is queryable using the `Cypher`_ query language. Open edX learning content is represented in Neo4j using a straightforward scheme: * A node is an XBlock usage. @@ -97,3 +114,7 @@ In a given course, which units contain problems with custom Python grading code? c.course_key = '' RETURN u.location + +You can see many more examples of useful CourseGraph queries on the `query archive wiki page`_. + +.. _query archive wiki page: https://openedx.atlassian.net/wiki/spaces/COMM/pages/3273228388/Useful+CourseGraph+Queries