From 986729bed76624436a2debb7d6f393dd87fca4af Mon Sep 17 00:00:00 2001 From: Brian Mesick Date: Tue, 2 May 2023 17:46:54 -0400 Subject: [PATCH] feat: Add event listener for course publish Creates the edx-platform plugin plumbing, adds some new requirements, maps the appropriate Django Signal to push course structure to ClickHouse. --- .../sinks/course_published.py | 23 ++++-- requirements/base.txt | 2 +- requirements/ci.txt | 4 +- requirements/dev.txt | 22 ++++-- requirements/doc.txt | 17 ++++- requirements/pip.txt | 2 +- requirements/quality.txt | 20 +++-- requirements/test.in | 1 + requirements/test.txt | 17 ++++- test_utils/helpers.py | 59 ++++++++++++++- tests/test_course_published.py | 73 ++++++++++++++++++- 11 files changed, 204 insertions(+), 36 deletions(-) diff --git a/event_sink_clickhouse/sinks/course_published.py b/event_sink_clickhouse/sinks/course_published.py index a62ee47..92c7567 100644 --- a/event_sink_clickhouse/sinks/course_published.py +++ b/event_sink_clickhouse/sinks/course_published.py @@ -25,19 +25,30 @@ class CoursePublishedSink(BaseSink): """ @staticmethod def _get_detached_xblock_types(): + """ + Import and return DETACHED_XBLOCK_TYPES. + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ # pylint: disable=import-outside-toplevel,import-error from xmodule.modulestore.store_utilities import DETACHED_XBLOCK_TYPES return DETACHED_XBLOCK_TYPES @staticmethod def _get_modulestore(): - # Import is placed here to avoid model import at project startup. - from xmodule.modulestore.django import modulestore # pylint: disable=import-outside-toplevel,import-error + """ + Import and return modulestore. + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ + # pylint: disable=import-outside-toplevel,import-error + from xmodule.modulestore.django import modulestore return modulestore() @staticmethod def _get_course_overview_model(): - # Import is placed here to avoid model import at project startup. + """ + Import and return CourseOverview. + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ # pylint: disable=import-outside-toplevel,import-error from openedx.core.djangoapps.content.course_overviews.models import CourseOverview return CourseOverview @@ -55,7 +66,7 @@ def strip_branch_and_version(location): @staticmethod def get_course_last_published(course_key): """ - Approximately when was a course last published? + Get approximate last publish date for the given course. We use the 'modified' column in the CourseOverview table as a quick and easy (although perhaps inexact) way of determining when a course was last @@ -72,8 +83,6 @@ def get_course_last_published(course_key): """ CourseOverview = CoursePublishedSink._get_course_overview_model() approx_last_published = CourseOverview.get_from_id(course_key).modified - print(approx_last_published) - raise Exception("foo") return str(approx_last_published) @staticmethod @@ -209,7 +218,7 @@ def dump(self, course_key): self.log.info("Completed dumping %s to ClickHouse", course_key) - except Exception: # pylint: disable=broad-except + except Exception: self.log.exception( "Error trying to dump course %s to ClickHouse!", course_string diff --git a/requirements/base.txt b/requirements/base.txt index 8519e6c..6dc376e 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -67,7 +67,7 @@ pytz==2023.3 # via # celery # django -requests==2.28.2 +requests==2.29.0 # via -r requirements/base.in six==1.16.0 # via click-repl diff --git a/requirements/ci.txt b/requirements/ci.txt index 1347919..bed1d01 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -12,7 +12,7 @@ filelock==3.12.0 # virtualenv packaging==23.1 # via tox -platformdirs==3.2.0 +platformdirs==3.5.0 # via virtualenv pluggy==1.0.0 # via tox @@ -29,5 +29,5 @@ tox==3.28.0 # tox-battery tox-battery==0.6.1 # via -r requirements/ci.in -virtualenv==20.22.0 +virtualenv==20.23.0 # via tox diff --git a/requirements/dev.txt b/requirements/dev.txt index cb5c47c..bea09b2 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -74,7 +74,7 @@ code-annotations==1.3.0 # via # -r requirements/quality.txt # edx-lint -coverage[toml]==7.2.3 +coverage[toml]==7.2.5 # via # -r requirements/quality.txt # pytest-cov @@ -173,7 +173,7 @@ pbr==5.11.1 # stevedore pip-tools==6.13.0 # via -r requirements/pip-tools.txt -platformdirs==3.2.0 +platformdirs==3.5.0 # via # -r requirements/ci.txt # -r requirements/quality.txt @@ -210,7 +210,7 @@ pydocstyle==6.3.0 # via -r requirements/quality.txt pygments==2.15.1 # via diff-cover -pylint==2.17.2 +pylint==2.17.3 # via # -r requirements/quality.txt # edx-lint @@ -265,7 +265,12 @@ pyyaml==6.0 # -r requirements/quality.txt # code-annotations # edx-i18n-tools -requests==2.28.2 + # responses +requests==2.29.0 + # via + # -r requirements/quality.txt + # responses +responses==0.23.1 # via -r requirements/quality.txt six==1.16.0 # via @@ -303,7 +308,7 @@ tomli==2.0.1 # pyproject-hooks # pytest # tox -tomlkit==0.11.7 +tomlkit==0.11.8 # via # -r requirements/quality.txt # pylint @@ -314,6 +319,10 @@ tox==3.28.0 # tox-battery tox-battery==0.6.1 # via -r requirements/ci.txt +types-pyyaml==6.0.12.9 + # via + # -r requirements/quality.txt + # responses typing-extensions==4.5.0 # via # -r requirements/quality.txt @@ -323,13 +332,14 @@ urllib3==1.26.15 # via # -r requirements/quality.txt # requests + # responses vine==5.0.0 # via # -r requirements/quality.txt # amqp # celery # kombu -virtualenv==20.22.0 +virtualenv==20.23.0 # via # -r requirements/ci.txt # tox diff --git a/requirements/doc.txt b/requirements/doc.txt index ceb4c4c..23fc8d3 100644 --- a/requirements/doc.txt +++ b/requirements/doc.txt @@ -67,7 +67,7 @@ click-repl==0.2.0 # celery code-annotations==1.3.0 # via -r requirements/test.txt -coverage[toml]==7.2.3 +coverage[toml]==7.2.5 # via # -r requirements/test.txt # pytest-cov @@ -218,21 +218,25 @@ pyyaml==6.0 # via # -r requirements/test.txt # code-annotations + # responses readme-renderer==37.3 # via twine -requests==2.28.2 +requests==2.29.0 # via # -r requirements/test.txt # requests-toolbelt + # responses # sphinx # twine -requests-toolbelt==0.10.1 +requests-toolbelt==1.0.0 # via twine +responses==0.23.1 + # via -r requirements/test.txt restructuredtext-lint==1.4.0 # via doc8 rfc3986==2.0.0 # via twine -rich==13.3.4 +rich==13.3.5 # via twine six==1.16.0 # via @@ -288,6 +292,10 @@ tomli==2.0.1 # pytest twine==4.0.2 # via -r requirements/doc.in +types-pyyaml==6.0.12.9 + # via + # -r requirements/test.txt + # responses typing-extensions==4.5.0 # via # pydata-sphinx-theme @@ -296,6 +304,7 @@ urllib3==1.26.15 # via # -r requirements/test.txt # requests + # responses # twine vine==5.0.0 # via diff --git a/requirements/pip.txt b/requirements/pip.txt index 4767414..e6827ba 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -8,7 +8,7 @@ wheel==0.40.0 # via -r requirements/pip.in # The following packages are considered to be unsafe in a requirements file: -pip==23.1.1 +pip==23.1.2 # via -r requirements/pip.in setuptools==67.7.2 # via -r requirements/pip.in diff --git a/requirements/quality.txt b/requirements/quality.txt index 3c01c8f..241350d 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -63,7 +63,7 @@ code-annotations==1.3.0 # via # -r requirements/test.txt # edx-lint -coverage[toml]==7.2.3 +coverage[toml]==7.2.5 # via # -r requirements/test.txt # pytest-cov @@ -133,7 +133,7 @@ pbr==5.11.1 # via # -r requirements/test.txt # stevedore -platformdirs==3.2.0 +platformdirs==3.5.0 # via pylint pluggy==1.0.0 # via @@ -155,7 +155,7 @@ pycparser==2.21 # cffi pydocstyle==6.3.0 # via -r requirements/quality.in -pylint==2.17.2 +pylint==2.17.3 # via # edx-lint # pylint-celery @@ -199,7 +199,12 @@ pyyaml==6.0 # via # -r requirements/test.txt # code-annotations -requests==2.28.2 + # responses +requests==2.29.0 + # via + # -r requirements/test.txt + # responses +responses==0.23.1 # via -r requirements/test.txt six==1.16.0 # via @@ -228,8 +233,12 @@ tomli==2.0.1 # coverage # pylint # pytest -tomlkit==0.11.7 +tomlkit==0.11.8 # via pylint +types-pyyaml==6.0.12.9 + # via + # -r requirements/test.txt + # responses typing-extensions==4.5.0 # via # astroid @@ -238,6 +247,7 @@ urllib3==1.26.15 # via # -r requirements/test.txt # requests + # responses vine==5.0.0 # via # -r requirements/test.txt diff --git a/requirements/test.in b/requirements/test.in index 6797160..bb78ea0 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -6,3 +6,4 @@ pytest-cov # pytest extension for code coverage statistics pytest-django # pytest extension for better Django support code-annotations # provides commands used by the pii_check make target. +responses # mocks for the requests library diff --git a/requirements/test.txt b/requirements/test.txt index 6ab39ca..dd30475 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -53,7 +53,7 @@ click-repl==0.2.0 # celery code-annotations==1.3.0 # via -r requirements/test.in -coverage[toml]==7.2.3 +coverage[toml]==7.2.5 # via pytest-cov # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt @@ -136,9 +136,15 @@ pytz==2023.3 # celery # django pyyaml==6.0 - # via code-annotations -requests==2.28.2 - # via -r requirements/base.txt + # via + # code-annotations + # responses +requests==2.29.0 + # via + # -r requirements/base.txt + # responses +responses==0.23.1 + # via -r requirements/test.in six==1.16.0 # via # -r requirements/base.txt @@ -159,10 +165,13 @@ tomli==2.0.1 # via # coverage # pytest +types-pyyaml==6.0.12.9 + # via responses urllib3==1.26.15 # via # -r requirements/base.txt # requests + # responses vine==5.0.0 # via # -r requirements/base.txt diff --git a/test_utils/helpers.py b/test_utils/helpers.py index 6c02a7d..b1de1fd 100644 --- a/test_utils/helpers.py +++ b/test_utils/helpers.py @@ -1,3 +1,7 @@ +""" +Helper functions for tests +""" + import csv import random import string @@ -17,6 +21,9 @@ class FakeXBlock: + """ + Fakes the parameters of an XBlock that we care about. + """ def __init__(self, identifier, detached_block=False): self.block_type = "course_info" if detached_block else "vertical" self.scope_ids = Mock() @@ -28,37 +35,57 @@ def __init__(self, identifier, detached_block=False): self.children = [] def get_children(self): + """ + Fakes the method of the same name from an XBlock. + """ return self.children def course_str_factory(): + """ + Return a valid course key string. + """ course_str = f"course-v1:{ORG}+{COURSE}+{COURSE_RUN}" return course_str def course_key_factory(): + """ + Return a CourseKey object from our course key string. + """ return CourseKey.from_string(course_str_factory()) def block_usage_locator_factory(): + """ + Create a BlockUsageLocator with a random id. + """ block_id = ''.join(random.choices(string.ascii_letters, k=10)) return BlockUsageLocator(course_key_factory(), block_type="category", block_id=block_id, deprecated=True) def mock_course_overview(): + """ + Create a fake CourseOverview object that supports just the things we care about. + """ mock_overview = MagicMock() mock_overview.get_from_id = MagicMock() - mock_overview.get_from_id.return_value = datetime.now() - # CourseOverview.get_from_id(course_key).modified + mock_overview.get_from_id.return_value.modified = datetime.now() return mock_overview def mock_detached_xblock_types(): + """ + Mock the return results of xmodule.modulestore.store_utilities.DETACHED_XBLOCK_TYPES + """ # Current values as of 2023-05-01 return {'static_tab', 'about', 'course_info'} def get_clickhouse_http_params(): + """ + Get the params used in ClickHouse queries. + """ blocks_params = { "input_format_allow_errors_num": 1, "input_format_allow_errors_ratio": 0.1, @@ -74,19 +101,26 @@ def get_clickhouse_http_params(): def course_factory(): + """ + Return a fake course structure that exercises most of the serialization features. + """ + # Create a base block top_block = FakeXBlock("top") course = [top_block, ] + # Create a few children for i in range(3): block = FakeXBlock(f"Child {i}") course.append(block) top_block.children.append(block) + # Create grandchildren on some children if i > 0: sub_block = FakeXBlock(f"Grandchild {i}") course.append(sub_block) block.children.append(sub_block) + # Create some detached blocks at the top level for i in range(3): course.append(FakeXBlock(f"Detached {i}", detached_block=True)) @@ -94,10 +128,17 @@ def course_factory(): def check_block_csv_matcher(course): + """ + Match the course structure CSV against the test course. + + This is a matcher for the "responses" library. It returns a function + that actually does the matching. + """ def match(request): body = request.body lines = body.split("\n")[:-1] + # There should be one CSV line for each block in the test course if len(lines) != len(course): return False, f"Body has {len(lines)} lines, course has {len(course)}" @@ -106,6 +147,8 @@ def match(request): i = 0 try: + # The CSV should be in the same order as our course, make sure + # everything matches for row in reader: block = course[i] assert row[0] == block.location.org @@ -123,10 +166,17 @@ def match(request): def check_relationship_csv_matcher(course): + """ + Match the relationship CSV against the test course. + + This is a matcher for the "responses" library. It returns a function + that actually does the matching. + """ + # Build our own copy of the test relationships first relationships = [] for block in course: course_key = str(block.location.course_key) - for index, child in enumerate(block.get_children()): + for _, child in enumerate(block.get_children()): parent_node = str(CoursePublishedSink.strip_branch_and_version(block.location)) child_node = str(CoursePublishedSink.strip_branch_and_version(child.location)) relationships.append((course_key, parent_node, child_node)) @@ -135,6 +185,7 @@ def match(request): body = request.body lines = body.split("\n")[:-1] + # The relationships CSV should have the same number of relationships as our test if len(lines) != len(relationships): return False, f"Body has {len(lines)} lines but there are {len(relationships)} relationships" @@ -143,6 +194,8 @@ def match(request): i = 0 try: + # The CSV should be in the same order as our relationships, make sure + # everything matches for row in reader: print(row) print(relationships[i]) diff --git a/tests/test_course_published.py b/tests/test_course_published.py index 834c00f..e7bc4c5 100644 --- a/tests/test_course_published.py +++ b/tests/test_course_published.py @@ -1,31 +1,48 @@ +""" +Tests for the course_published sinks. +""" import logging -from io import StringIO +from datetime import datetime from unittest.mock import patch +import pytest import responses from responses import matchers from responses.registries import OrderedRegistry +from event_sink_clickhouse.sinks.course_published import CoursePublishedSink +from event_sink_clickhouse.tasks import dump_course_to_clickhouse from test_utils.helpers import ( check_block_csv_matcher, check_relationship_csv_matcher, course_factory, course_str_factory, get_clickhouse_http_params, + mock_course_overview, mock_detached_xblock_types, ) -from event_sink_clickhouse.tasks import dump_course_to_clickhouse -@responses.activate(registry=OrderedRegistry) +@responses.activate(registry=OrderedRegistry) # pylint: disable=unexpected-keyword-arg,no-value-for-parameter @patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_detached_xblock_types") @patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_modulestore") def test_course_publish_success(mock_modulestore, mock_detached, caplog): + """ + Test of a successful end-to-end run. + """ + # Necessary to get logs from the task caplog.set_level(logging.INFO, logger="edx.celery.task") + + # Create a fake course structure with a few fake XBlocks course = course_factory() mock_modulestore.return_value.get_items.return_value = course + + # Fake the "detached types" list since we can't import it here mock_detached.return_value = mock_detached_xblock_types() + # Use the responses library to catch the POSTs to ClickHouse + # and match them against the expected values, including CSV + # content blocks_params, relationships_params = get_clickhouse_http_params() responses.post( @@ -46,5 +63,55 @@ def test_course_publish_success(mock_modulestore, mock_detached, caplog): course = course_str_factory() dump_course_to_clickhouse(course) + # Just to make sure we're not calling things more than we need to assert mock_modulestore.call_count == 1 assert mock_detached.call_count == 1 + + +@responses.activate(registry=OrderedRegistry) # pylint: disable=unexpected-keyword-arg,no-value-for-parameter +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_detached_xblock_types") +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_modulestore") +def test_course_publish_clickhouse_error(mock_modulestore, mock_detached, caplog): + """ + Test the case where a ClickHouse POST fails. + """ + caplog.set_level(logging.INFO, logger="edx.celery.task") + course = course_factory() + mock_modulestore.return_value.get_items.return_value = course + mock_detached.return_value = mock_detached_xblock_types() + + # This will raise an exception when we try to post to ClickHouse + responses.post( + "https://foo.bar/", + body=Exception("Bogus test exception in ClickHouse call") + ) + + course = course_str_factory() + + with pytest.raises(Exception): + dump_course_to_clickhouse(course) + + # Make sure everything was called as we expect + assert mock_modulestore.call_count == 1 + assert mock_detached.call_count == 1 + + # Make sure our log message went through. + assert f"Error trying to dump course {course} to ClickHouse!" in caplog.text + + +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_course_overview_model") +def test_get_course_last_published(mock_overview): + """ + This function isn't in use yet, but we'll need it for the management command + """ + # Create a fake course overview, which will return a datetime object + course = mock_course_overview() + mock_overview.return_value = course + + # Request our course last published date + course_key = course_str_factory() + + # Confirm that the string date we get back is a valid date + last_published_date = CoursePublishedSink.get_course_last_published(course_key) + dt = datetime.strptime(last_published_date, "%Y-%m-%d %H:%M:%S.%f") + assert dt