diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 12c711d..83c2841 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -14,7 +14,9 @@ Change Log Unreleased ********** -* +* First functional version, includes a CMS listener for COURSE_PUBLISHED +* README updates +* New configuration settings for connection to ClickHouse 0.1.0 – 2023-04-24 ********************************************** diff --git a/README.rst b/README.rst index 3ad6808..9172454 100644 --- a/README.rst +++ b/README.rst @@ -7,17 +7,25 @@ Event Sink ClickHouse Purpose ******* -A listener for `Open edX events`_ to send them to ClickHouse. This project -acts as a plugin to the Edx Platform, listens for configured Open edX events, -and sends them to a ClickHouse database for analytics or other processing. This -is being maintained as part of the Open Analytics Reference System (OARS) -project. +This project acts as a plugin to the `Edx Platform`_, listens for +configured `Open edX events`_, and sends them to a `ClickHouse`_ database for +analytics or other processing. This is being maintained as part of the Open +Analytics Reference System (`OARS`_) project. OARS consumes the data sent to ClickHouse by this plugin as part of data enrichment for reporting, or capturing data that otherwise does not fit in xAPI. +Currently the only sink is in the CMS. It listens for the ``COURSE_PUBLISHED`` +signal and serializes a subset of the published course blocks into one table +and the relationships between blocks into another table. With those we are +able to recreate the "graph" of the course and get relevant data, such as +block names, for reporting. + .. _Open edX events: https://github.com/openedx/openedx-events +.. _Edx Platform: https://github.com/openedx/edx-platform +.. _ClickHouse: https://clickhouse.com +.. _OARS: https://docs.openedx.org/projects/openedx-oars/en/latest/index.html Getting Started *************** @@ -75,12 +83,38 @@ Every time you develop something in this repo Deploying ========= -TODO: How can a new user go about deploying this component? Is it just a few -commands? Is there a larger how-to that should be linked here? +This plugin will be deployed by default in an OARS Tutor environment. For other +deployments install the library or add it to private requirements of your +virtual environment ( ``requirements/private.txt`` ). + +#. Run ``pip install openedx-event-sink-clickhouse``. + +#. Run migrations: + +- ``python manage.py lms migrate`` + +- ``python manage.py cms migrate`` -PLACEHOLDER: For details on how to deploy this component, see the `deployment how-to`_ +#. Restart LMS service and celery workers of edx-platform. + +Configuration +=============== + +Currently all events will be listened to by default (there is only one). So +the only necessary configuration is a ClickHouse connection: + +.. code-block:: -.. _deployment how-to: https://docs.openedx.org/projects/openedx-event-sink-clickhouse/how-tos/how-to-deploy-this-component.html + EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG = { + # URL to a running ClickHouse server's HTTP interface. ex: https://foo.openedx.org:8443/ or + # http://foo.openedx.org:8123/ . Note that we only support the ClickHouse HTTP interface + # to avoid pulling in more dependencies to the platform than necessary. + "url": "http://clickhouse:8123", + "username": "changeme", + "password": "changeme", + "database": "event_sink", + "timeout_secs": 3, + } Getting Help ************ diff --git a/catalog-info.yaml b/catalog-info.yaml index 6fb2ae8..28736d3 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -2,9 +2,9 @@ # https://open-edx-proposals.readthedocs.io/en/latest/processes/oep-0055-proc-project-maintainers.html apiVersion: backstage.io/v1alpha1 -kind: "" +kind: "Component" metadata: - name: 'openedx_event_sink_clickhouse' + name: 'openedx-event-sink-clickhouse' description: "A sink for Open edX events to send them to ClickHouse" annotations: # (Optional) Annotation keys and values can be whatever you want. @@ -15,18 +15,10 @@ metadata: spec: # (Required) This can be a group(`group:` or a user(`user:`) - owner: "" + owner: "group:openedx-event-sink-clickhouse-maintainers" # (Required) Acceptable Type Values: service, website, library - type: '' + type: 'library' # (Required) Acceptable Lifecycle Values: experimental, production, deprecated lifecycle: 'experimental' - - # (Optional) The value can be the name of any known component. - subcomponentOf: '' - - # (Optional) An array of different components or resources. - dependsOn: - - '' - - '' diff --git a/event_sink_clickhouse/apps.py b/event_sink_clickhouse/apps.py index 4981b8f..85c287e 100644 --- a/event_sink_clickhouse/apps.py +++ b/event_sink_clickhouse/apps.py @@ -3,6 +3,7 @@ """ from django.apps import AppConfig +from edx_django_utils.plugins import PluginSettings, PluginSignals class EventSinkClickhouseConfig(AppConfig): @@ -11,3 +12,39 @@ class EventSinkClickhouseConfig(AppConfig): """ name = 'event_sink_clickhouse' + verbose_name = "Event Sink ClickHouse" + + plugin_app = { + PluginSettings.CONFIG: { + 'lms.djangoapp': { + 'production': {PluginSettings.RELATIVE_PATH: 'settings.production'}, + 'common': {PluginSettings.RELATIVE_PATH: 'settings.common'}, + }, + 'cms.djangoapp': { + 'production': {PluginSettings.RELATIVE_PATH: 'settings.production'}, + 'common': {PluginSettings.RELATIVE_PATH: 'settings.common'}, + } + }, + # Configuration setting for Plugin Signals for this app. + PluginSignals.CONFIG: { + # Configure the Plugin Signals for each Project Type, as needed. + 'cms.djangoapp': { + # List of all plugin Signal receivers for this app and project type. + PluginSignals.RECEIVERS: [{ + # The name of the app's signal receiver function. + PluginSignals.RECEIVER_FUNC_NAME: 'receive_course_publish', + + # The full path to the module where the signal is defined. + PluginSignals.SIGNAL_PATH: 'xmodule.modulestore.django.COURSE_PUBLISHED', + }], + } + }, + } + + def ready(self): + """ + Import our Celery tasks for initialization. + """ + super().ready() + + from . import tasks # pylint: disable=import-outside-toplevel, unused-import diff --git a/event_sink_clickhouse/settings/__init__.py b/event_sink_clickhouse/settings/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/event_sink_clickhouse/settings/common.py b/event_sink_clickhouse/settings/common.py new file mode 100644 index 0000000..1b6d117 --- /dev/null +++ b/event_sink_clickhouse/settings/common.py @@ -0,0 +1,19 @@ +""" +Default settings for the openedx_event_sink_clickhouse app. +""" + + +def plugin_settings(settings): + """ + Adds default settings + """ + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG = { + # URL to a running ClickHouse server's HTTP interface. ex: https://foo.openedx.org:8443/ or + # http://foo.openedx.org:8123/ . Note that we only support the ClickHouse HTTP interface + # to avoid pulling in more dependencies to the platform than necessary. + "url": "http://clickhouse:8123", + "username": "changeme", + "password": "changeme", + "database": "event_sink", + "timeout_secs": 3, + } diff --git a/event_sink_clickhouse/settings/production.py b/event_sink_clickhouse/settings/production.py new file mode 100644 index 0000000..18fee66 --- /dev/null +++ b/event_sink_clickhouse/settings/production.py @@ -0,0 +1,13 @@ +""" +Production settings for the openedx_event_sink_clickhouse app. +""" + + +def plugin_settings(settings): + """ + Override the default app settings with production settings. + """ + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG = settings.ENV_TOKENS.get( + 'EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG', + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG + ) diff --git a/event_sink_clickhouse/signals.py b/event_sink_clickhouse/signals.py new file mode 100644 index 0000000..f752120 --- /dev/null +++ b/event_sink_clickhouse/signals.py @@ -0,0 +1,13 @@ +""" +Signal handler functions, mapped to specific signals in apps.py. +""" + + +def receive_course_publish(sender, course_key, **kwargs): # pylint: disable=unused-argument + """ + Receives COURSE_PUBLISHED signal and queues the dump job. + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from .tasks import dump_course_to_clickhouse # pylint: disable=import-outside-toplevel + + dump_course_to_clickhouse.delay(str(course_key)) diff --git a/event_sink_clickhouse/sinks/__init__.py b/event_sink_clickhouse/sinks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/event_sink_clickhouse/sinks/base_sink.py b/event_sink_clickhouse/sinks/base_sink.py new file mode 100644 index 0000000..6bcd40a --- /dev/null +++ b/event_sink_clickhouse/sinks/base_sink.py @@ -0,0 +1,47 @@ +""" +Base classes for event sinks +""" +from collections import namedtuple + +import requests +from django.conf import settings + +ClickHouseAuth = namedtuple("ClickHouseAuth", ["username", "password"]) + + +class BaseSink: + """ + Base class for ClickHouse event sink, allows overwriting of default settings + """ + def __init__(self, connection_overrides, log): + self.log = log + self.ch_url = settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["url"] + self.ch_auth = ClickHouseAuth(settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["username"], + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["password"]) + self.ch_database = settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["database"] + self.ch_timeout_secs = settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["timeout_secs"] + + # If any overrides to the ClickHouse connection + if connection_overrides: + self.ch_url = connection_overrides.get("url", self.ch_url) + self.ch_auth = ClickHouseAuth(connection_overrides.get("username", self.ch_auth.username), + connection_overrides.get("password", self.ch_auth.password)) + self.ch_database = connection_overrides.get("database", self.ch_database) + self.ch_timeout_secs = connection_overrides.get("timeout_secs", self.ch_timeout_secs) + + def _send_clickhouse_request(self, request): + """ + Perform the actual HTTP requests to ClickHouse. + """ + session = requests.Session() + prepared_request = request.prepare() + + try: + response = session.send(prepared_request, timeout=self.ch_timeout_secs) + response.raise_for_status() + except requests.exceptions.HTTPError as e: + self.log.error(str(e)) + self.log.error(e.response.headers) + self.log.error(e.response) + self.log.error(e.response.text) + raise diff --git a/event_sink_clickhouse/sinks/course_published.py b/event_sink_clickhouse/sinks/course_published.py new file mode 100644 index 0000000..5c01afd --- /dev/null +++ b/event_sink_clickhouse/sinks/course_published.py @@ -0,0 +1,250 @@ +""" +Handler for the CMS COURSE_PUBLISHED event + +Does the following: +- Pulls the course structure from modulestore +- Serialize the xblocks and their parent/child relationships +- Sends them to ClickHouse in CSV format + +Note that the serialization format does not include all fields as there may be things like +LTI passwords and other secrets. We just take the fields necessary for reporting at this time. +""" + +import csv +import io +import uuid + +import requests +from django.utils import timezone + +from event_sink_clickhouse.sinks.base_sink import BaseSink + +# Defaults we want to ensure we fail early on bulk inserts +CLICKHOUSE_BULK_INSERT_PARAMS = { + "input_format_allow_errors_num": 1, + "input_format_allow_errors_ratio": 0.1, +} + + +class CoursePublishedSink(BaseSink): + """ + Event sink for the COURSE_PUBLISHED signal + """ + @staticmethod + def _get_detached_xblock_types(): + """ + Import and return DETACHED_XBLOCK_TYPES. + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ + # pylint: disable=import-outside-toplevel,import-error + from xmodule.modulestore.store_utilities import DETACHED_XBLOCK_TYPES + return DETACHED_XBLOCK_TYPES + + @staticmethod + def _get_modulestore(): + """ + Import and return modulestore. + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ + # pylint: disable=import-outside-toplevel,import-error + from xmodule.modulestore.django import modulestore + return modulestore() + + @staticmethod + def _get_course_overview_model(): + """ + Import and return CourseOverview. + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ + # pylint: disable=import-outside-toplevel,import-error + from openedx.core.djangoapps.content.course_overviews.models import CourseOverview + return CourseOverview + + @staticmethod + def strip_branch_and_version(location): + """ + Removes the branch and version information from a location. + Args: + location: an xblock's location. + Returns: that xblock's location without branch and version information. + """ + return location.for_branch(None) + + @staticmethod + def get_course_last_published(course_key): + """ + Get approximate last publish date for the given course. + + We use the 'modified' column in the CourseOverview table as a quick and easy + (although perhaps inexact) way of determining when a course was last + published. This works because CourseOverview rows are re-written upon + course publish. + + Args: + course_key: a CourseKey + + Returns: The datetime the course was last published at, stringified. + Uses Python's default str(...) implementation for datetimes, which + is sortable and similar to ISO 8601: + https://docs.python.org/3/library/datetime.html#datetime.date.__str__ + """ + CourseOverview = CoursePublishedSink._get_course_overview_model() + approx_last_published = CourseOverview.get_from_id(course_key).modified + return str(approx_last_published) + + @staticmethod + def serialize_item(item, index, detached_xblock_types, dump_id, dump_timestamp): + """ + Args: + item: an XBlock + index: a number indicating where the item falls in the course hierarchy + + Returns: + fields: a *limited* dictionary of an XBlock's field names and values + block_type: the name of the XBlock's type (i.e. 'course' + or 'problem') + """ + course_key = item.scope_ids.usage_id.course_key + block_type = item.scope_ids.block_type + + serialized_block = { + 'org': course_key.org, + 'course_key': str(course_key), + 'course': course_key.course, + 'run': course_key.run, + 'location': str(item.location), + 'display_name': item.display_name_with_default.replace("'", "\'"), + 'block_type': block_type, + 'detached': 1 if block_type in detached_xblock_types else 0, + 'order': index, + 'edited_on': str(getattr(item, 'edited_on', '')), + 'dump_id': dump_id, + 'time_last_dumped': dump_timestamp, + } + + return serialized_block + + def serialize_course(self, course_id): + """ + Serializes a course into a CSV of nodes and relationships. + + Args: + course_id: CourseKey of the course we want to serialize + + Returns: + nodes: a list of dicts representing xblocks for the course + relationships: a list of dicts representing relationships between nodes + """ + modulestore = CoursePublishedSink._get_modulestore() + detached_xblock_types = CoursePublishedSink._get_detached_xblock_types() + + dump_id = str(uuid.uuid4()) + dump_timestamp = str(timezone.now()) + + # Create a location to node mapping as a lookup for writing relationships later + location_to_node = {} + items = modulestore.get_items(course_id) + + # Serialize the XBlocks to dicts and map them with their location as keys the + # whole map needs to be completed before we can define relationships + index = 0 + for item in items: + index += 1 + fields = self.serialize_item(item, index, detached_xblock_types, dump_id, dump_timestamp) + location_to_node[self.strip_branch_and_version(item.location)] = fields + + # Create a list of relationships between blocks, using their locations as identifiers + relationships = [] + for item in items: + for index, child in enumerate(item.get_children()): + parent_node = location_to_node.get(self.strip_branch_and_version(item.location)) + child_node = location_to_node.get(self.strip_branch_and_version(child.location)) + + if parent_node is not None and child_node is not None: + relationship = { + 'course_key': str(course_id), + 'parent_location': str(parent_node["location"]), + 'child_location': str(child_node["location"]), + 'order': index, + 'dump_id': dump_id, + 'time_last_dumped': dump_timestamp, + } + relationships.append(relationship) + + nodes = list(location_to_node.values()) + return nodes, relationships + + def _send_xblocks(self, serialized_xblocks): + """ + Create the insert query and CSV to send the serialized XBlocks to ClickHouse. + """ + params = CLICKHOUSE_BULK_INSERT_PARAMS.copy() + + # "query" is a special param for the query, it's the best way to get the FORMAT CSV in there. + params["query"] = f"INSERT INTO {self.ch_database}.course_blocks FORMAT CSV" + + output = io.StringIO() + writer = csv.writer(output, quoting=csv.QUOTE_NONNUMERIC) + + for node in serialized_xblocks: + writer.writerow(node.values()) + + request = requests.Request( + 'POST', + self.ch_url, + data=output.getvalue(), + params=params, + auth=self.ch_auth + ) + + self._send_clickhouse_request(request) + + def _send_relationships(self, relationships): + """ + Create the insert query and CSV to send the serialized relationships to ClickHouse. + """ + params = CLICKHOUSE_BULK_INSERT_PARAMS.copy() + + # "query" is a special param for the query, it's the best way to get the FORMAT CSV in there. + params["query"] = f"INSERT INTO {self.ch_database}.course_relationships FORMAT CSV" + output = io.StringIO() + writer = csv.writer(output, quoting=csv.QUOTE_NONNUMERIC) + + for relationship in relationships: + writer.writerow(relationship.values()) + + request = requests.Request( + 'POST', + self.ch_url, + data=output.getvalue(), + params=params, + auth=self.ch_auth + ) + + self._send_clickhouse_request(request) + + def dump(self, course_key): + """ + Do the serialization and send to ClickHouse + """ + serialized_blocks, relationships = self.serialize_course(course_key) + + self.log.info( + "Now dumping %s to ClickHouse: %d serialized_blocks and %d relationships", + course_key, + len(serialized_blocks), + len(relationships), + ) + + course_string = str(course_key) + + try: + self._send_xblocks(serialized_blocks) + self._send_relationships(relationships) + self.log.info("Completed dumping %s to ClickHouse", course_key) + except Exception: + self.log.exception( + "Error trying to dump course %s to ClickHouse!", + course_string + ) + raise diff --git a/event_sink_clickhouse/tasks.py b/event_sink_clickhouse/tasks.py new file mode 100644 index 0000000..9089bab --- /dev/null +++ b/event_sink_clickhouse/tasks.py @@ -0,0 +1,30 @@ +""" +This file contains a management command for exporting course modulestore data to ClickHouse. +""" + +import logging + +from celery import shared_task +from edx_django_utils.monitoring import set_code_owner_attribute +from opaque_keys.edx.keys import CourseKey + +from event_sink_clickhouse.sinks.course_published import CoursePublishedSink + +log = logging.getLogger(__name__) +celery_log = logging.getLogger('edx.celery.task') + + +@shared_task +@set_code_owner_attribute +def dump_course_to_clickhouse(course_key_string, connection_overrides=None): + """ + Serialize a course and writes it to ClickHouse. + + Arguments: + course_key_string: course key for the course to be exported + connection_overrides (dict): overrides to ClickHouse connection + parameters specified in `settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG`. + """ + course_key = CourseKey.from_string(course_key_string) + sink = CoursePublishedSink(connection_overrides=connection_overrides, log=celery_log) + sink.dump(course_key) diff --git a/pii_report/2023-24-04-10-29-05.yaml b/pii_report/2023-24-04-10-29-05.yaml deleted file mode 100644 index e9c88aa..0000000 --- a/pii_report/2023-24-04-10-29-05.yaml +++ /dev/null @@ -1,79 +0,0 @@ -.annotation_safe_list.yml: -- annotation_data: This model has no PII - annotation_token: '.. no_pii:' - extra: - full_comment: '{''.. no_pii:'': ''This model has no PII''}' - object_id: contenttypes.ContentType - filename: .annotation_safe_list.yml - found_by: safelist - line_number: 0 - report_group_id: 1 -- annotation_data: This model has no PII - annotation_token: '.. no_pii:' - extra: - full_comment: '{''.. no_pii:'': ''This model has no PII''}' - object_id: admin.LogEntry - filename: .annotation_safe_list.yml - found_by: safelist - line_number: 0 - report_group_id: 2 -- annotation_data: This model has no PII - annotation_token: '.. no_pii:' - extra: - full_comment: '{''.. no_pii:'': ''This model has no PII''}' - object_id: auth.Permission - filename: .annotation_safe_list.yml - found_by: safelist - line_number: 0 - report_group_id: 3 -- annotation_data: This model has no PII - annotation_token: '.. no_pii:' - extra: - full_comment: '{''.. no_pii:'': ''This model has no PII''}' - object_id: sessions.Session - filename: .annotation_safe_list.yml - found_by: safelist - line_number: 0 - report_group_id: 4 -- annotation_data: This model minimally contains a username, password, and email - annotation_token: .. pii - extra: - full_comment: '{''.. pii'': ''This model minimally contains a username, password, - and email'', ''.. pii_types'': ''username, email_address, password'', ''.. pii_retirement'': - ''consumer_api''}' - object_id: auth.User - filename: .annotation_safe_list.yml - found_by: safelist - line_number: 0 - report_group_id: 5 -- annotation_data: username, email_address, password - annotation_token: .. pii_types - extra: - full_comment: '{''.. pii'': ''This model minimally contains a username, password, - and email'', ''.. pii_types'': ''username, email_address, password'', ''.. pii_retirement'': - ''consumer_api''}' - object_id: auth.User - filename: .annotation_safe_list.yml - found_by: safelist - line_number: 0 - report_group_id: 5 -- annotation_data: consumer_api - annotation_token: .. pii_retirement - extra: - full_comment: '{''.. pii'': ''This model minimally contains a username, password, - and email'', ''.. pii_types'': ''username, email_address, password'', ''.. pii_retirement'': - ''consumer_api''}' - object_id: auth.User - filename: .annotation_safe_list.yml - found_by: safelist - line_number: 0 - report_group_id: 5 -- annotation_data: This model has no PII - annotation_token: '.. no_pii:' - extra: - full_comment: '{''.. no_pii:'': ''This model has no PII''}' - object_id: auth.Group - filename: .annotation_safe_list.yml - found_by: safelist - line_number: 0 - report_group_id: 6 diff --git a/requirements/base.in b/requirements/base.in index a954780..c08ac0a 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,5 +1,8 @@ # Core requirements for using this application -c constraints.txt +celery # Asynchronous task execution library Django # Web application framework - +requests # HTTP request library +edx-django-utils # Django utilities, we use caching and monitoring +edx-opaque-keys # Parsing library for course and usage keys diff --git a/requirements/base.txt b/requirements/base.txt index a615544..9345f68 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,13 +4,85 @@ # # make upgrade # +amqp==5.1.1 + # via kombu asgiref==3.6.0 # via django -django==3.2.18 +billiard==3.6.4.0 + # via celery +celery==5.2.7 + # via -r requirements/base.in +certifi==2022.12.7 + # via requests +cffi==1.15.1 + # via pynacl +charset-normalizer==3.1.0 + # via requests +click==8.1.3 + # via + # celery + # click-didyoumean + # click-plugins + # click-repl + # edx-django-utils +click-didyoumean==0.3.0 + # via celery +click-plugins==1.1.1 + # via celery +click-repl==0.2.0 + # via celery +django==3.2.19 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/base.in + # django-crum + # edx-django-utils +django-crum==0.7.9 + # via edx-django-utils +django-waffle==3.0.0 + # via edx-django-utils +edx-django-utils==5.4.0 + # via -r requirements/base.in +edx-opaque-keys==2.3.0 + # via -r requirements/base.in +idna==3.4 + # via requests +kombu==5.2.4 + # via celery +newrelic==8.8.0 + # via edx-django-utils +pbr==5.11.1 + # via stevedore +prompt-toolkit==3.0.38 + # via click-repl +psutil==5.9.5 + # via edx-django-utils +pycparser==2.21 + # via cffi +pymongo==3.13.0 + # via edx-opaque-keys +pynacl==1.5.0 + # via edx-django-utils pytz==2023.3 - # via django + # via + # celery + # django +requests==2.29.0 + # via -r requirements/base.in +six==1.16.0 + # via click-repl sqlparse==0.4.4 # via django +stevedore==5.0.0 + # via + # edx-django-utils + # edx-opaque-keys +urllib3==1.26.15 + # via requests +vine==5.0.0 + # via + # amqp + # celery + # kombu +wcwidth==0.2.6 + # via prompt-toolkit diff --git a/requirements/dev.txt b/requirements/dev.txt index 2392e4a..721236d 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -4,6 +4,10 @@ # # make upgrade # +amqp==5.1.1 + # via + # -r requirements/quality.txt + # kombu asgiref==3.6.0 # via # -r requirements/quality.txt @@ -13,24 +17,59 @@ astroid==2.15.4 # -r requirements/quality.txt # pylint # pylint-celery +billiard==3.6.4.0 + # via + # -r requirements/quality.txt + # celery build==0.10.0 # via # -r requirements/pip-tools.txt # pip-tools +celery==5.2.7 + # via -r requirements/quality.txt +certifi==2022.12.7 + # via + # -r requirements/quality.txt + # requests +cffi==1.15.1 + # via + # -r requirements/quality.txt + # pynacl chardet==5.1.0 # via diff-cover +charset-normalizer==3.1.0 + # via + # -r requirements/quality.txt + # requests click==8.1.3 # via # -r requirements/pip-tools.txt # -r requirements/quality.txt + # celery + # click-didyoumean # click-log + # click-plugins + # click-repl # code-annotations + # edx-django-utils # edx-lint # pip-tools +click-didyoumean==0.3.0 + # via + # -r requirements/quality.txt + # celery click-log==0.4.0 # via # -r requirements/quality.txt # edx-lint +click-plugins==1.1.1 + # via + # -r requirements/quality.txt + # celery +click-repl==0.2.0 + # via + # -r requirements/quality.txt + # celery code-annotations==1.3.0 # via # -r requirements/quality.txt @@ -49,15 +88,29 @@ distlib==0.3.6 # via # -r requirements/ci.txt # virtualenv -django==3.2.18 +django==3.2.19 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/quality.txt + # django-crum + # edx-django-utils # edx-i18n-tools +django-crum==0.7.9 + # via + # -r requirements/quality.txt + # edx-django-utils +django-waffle==3.0.0 + # via + # -r requirements/quality.txt + # edx-django-utils +edx-django-utils==5.4.0 + # via -r requirements/quality.txt edx-i18n-tools==0.9.2 # via -r requirements/dev.in edx-lint==5.3.4 # via -r requirements/quality.txt +edx-opaque-keys==2.3.0 + # via -r requirements/quality.txt exceptiongroup==1.1.1 # via # -r requirements/quality.txt @@ -67,6 +120,10 @@ filelock==3.12.0 # -r requirements/ci.txt # tox # virtualenv +idna==3.4 + # via + # -r requirements/quality.txt + # requests iniconfig==2.0.0 # via # -r requirements/quality.txt @@ -80,6 +137,10 @@ jinja2==3.1.2 # -r requirements/quality.txt # code-annotations # diff-cover +kombu==5.2.4 + # via + # -r requirements/quality.txt + # celery lazy-object-proxy==1.9.0 # via # -r requirements/quality.txt @@ -92,6 +153,10 @@ mccabe==0.7.0 # via # -r requirements/quality.txt # pylint +newrelic==8.8.0 + # via + # -r requirements/quality.txt + # edx-django-utils packaging==23.1 # via # -r requirements/ci.txt @@ -123,12 +188,24 @@ pluggy==1.0.0 # tox polib==1.2.0 # via edx-i18n-tools +prompt-toolkit==3.0.38 + # via + # -r requirements/quality.txt + # click-repl +psutil==5.9.5 + # via + # -r requirements/quality.txt + # edx-django-utils py==1.11.0 # via # -r requirements/ci.txt # tox pycodestyle==2.10.0 # via -r requirements/quality.txt +pycparser==2.21 + # via + # -r requirements/quality.txt + # cffi pydocstyle==6.3.0 # via -r requirements/quality.txt pygments==2.15.1 @@ -153,6 +230,14 @@ pylint-plugin-utils==0.7 # -r requirements/quality.txt # pylint-celery # pylint-django +pymongo==3.13.0 + # via + # -r requirements/quality.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/quality.txt + # edx-django-utils pyproject-hooks==1.0.0 # via # -r requirements/pip-tools.txt @@ -173,16 +258,25 @@ python-slugify==8.0.1 pytz==2023.3 # via # -r requirements/quality.txt + # celery # django pyyaml==6.0 # via # -r requirements/quality.txt # code-annotations # edx-i18n-tools + # responses +requests==2.29.0 + # via + # -r requirements/quality.txt + # responses +responses==0.23.1 + # via -r requirements/quality.txt six==1.16.0 # via # -r requirements/ci.txt # -r requirements/quality.txt + # click-repl # edx-lint # tox snowballstemmer==2.2.0 @@ -197,6 +291,8 @@ stevedore==5.0.0 # via # -r requirements/quality.txt # code-annotations + # edx-django-utils + # edx-opaque-keys text-unidecode==1.3 # via # -r requirements/quality.txt @@ -223,15 +319,34 @@ tox==3.28.0 # tox-battery tox-battery==0.6.1 # via -r requirements/ci.txt +types-pyyaml==6.0.12.9 + # via + # -r requirements/quality.txt + # responses typing-extensions==4.5.0 # via # -r requirements/quality.txt # astroid # pylint +urllib3==1.26.15 + # via + # -r requirements/quality.txt + # requests + # responses +vine==5.0.0 + # via + # -r requirements/quality.txt + # amqp + # celery + # kombu virtualenv==20.23.0 # via # -r requirements/ci.txt # tox +wcwidth==0.2.6 + # via + # -r requirements/quality.txt + # prompt-toolkit wheel==0.40.0 # via # -r requirements/pip-tools.txt diff --git a/requirements/doc.txt b/requirements/doc.txt index 55ff9ea..c7d3a6a 100644 --- a/requirements/doc.txt +++ b/requirements/doc.txt @@ -8,6 +8,10 @@ accessible-pygments==0.0.4 # via pydata-sphinx-theme alabaster==0.7.13 # via sphinx +amqp==5.1.1 + # via + # -r requirements/test.txt + # kombu asgiref==3.6.0 # via # -r requirements/test.txt @@ -18,32 +22,69 @@ babel==2.12.1 # sphinx beautifulsoup4==4.12.2 # via pydata-sphinx-theme +billiard==3.6.4.0 + # via + # -r requirements/test.txt + # celery bleach==6.0.0 # via readme-renderer build==0.10.0 # via -r requirements/doc.in +celery==5.2.7 + # via -r requirements/test.txt certifi==2022.12.7 - # via requests + # via + # -r requirements/test.txt + # requests cffi==1.15.1 - # via cryptography + # via + # -r requirements/test.txt + # pynacl charset-normalizer==3.1.0 - # via requests + # via + # -r requirements/test.txt + # requests click==8.1.3 # via # -r requirements/test.txt + # celery + # click-didyoumean + # click-plugins + # click-repl # code-annotations + # edx-django-utils +click-didyoumean==0.3.0 + # via + # -r requirements/test.txt + # celery +click-plugins==1.1.1 + # via + # -r requirements/test.txt + # celery +click-repl==0.2.0 + # via + # -r requirements/test.txt + # celery code-annotations==1.3.0 # via -r requirements/test.txt coverage[toml]==7.2.5 # via # -r requirements/test.txt # pytest-cov -cryptography==40.0.2 - # via secretstorage -django==3.2.18 +django==3.2.19 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/test.txt + # django-crum + # edx-django-utils +django-crum==0.7.9 + # via + # -r requirements/test.txt + # edx-django-utils +django-waffle==3.0.0 + # via + # -r requirements/test.txt + # edx-django-utils doc8==1.1.1 # via -r requirements/doc.in docutils==0.19 @@ -53,12 +94,18 @@ docutils==0.19 # readme-renderer # restructuredtext-lint # sphinx +edx-django-utils==5.4.0 + # via -r requirements/test.txt +edx-opaque-keys==2.3.0 + # via -r requirements/test.txt exceptiongroup==1.1.1 # via # -r requirements/test.txt # pytest idna==3.4 - # via requests + # via + # -r requirements/test.txt + # requests imagesize==1.4.1 # via sphinx importlib-metadata==6.6.0 @@ -74,10 +121,6 @@ iniconfig==2.0.0 # pytest jaraco-classes==3.2.3 # via keyring -jeepney==0.8.0 - # via - # keyring - # secretstorage jinja2==3.1.2 # via # -r requirements/test.txt @@ -85,6 +128,10 @@ jinja2==3.1.2 # sphinx keyring==23.13.1 # via twine +kombu==5.2.4 + # via + # -r requirements/test.txt + # celery markdown-it-py==2.2.0 # via rich markupsafe==2.1.2 @@ -95,6 +142,10 @@ mdurl==0.1.2 # via markdown-it-py more-itertools==9.1.0 # via jaraco-classes +newrelic==8.8.0 + # via + # -r requirements/test.txt + # edx-django-utils packaging==23.1 # via # -r requirements/test.txt @@ -112,8 +163,18 @@ pluggy==1.0.0 # via # -r requirements/test.txt # pytest +prompt-toolkit==3.0.38 + # via + # -r requirements/test.txt + # click-repl +psutil==5.9.5 + # via + # -r requirements/test.txt + # edx-django-utils pycparser==2.21 - # via cffi + # via + # -r requirements/test.txt + # cffi pydata-sphinx-theme==0.13.3 # via sphinx-book-theme pygments==2.15.1 @@ -124,6 +185,14 @@ pygments==2.15.1 # readme-renderer # rich # sphinx +pymongo==3.13.0 + # via + # -r requirements/test.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/test.txt + # edx-django-utils pyproject-hooks==1.0.0 # via build pytest==7.3.1 @@ -143,30 +212,37 @@ pytz==2023.3 # via # -r requirements/test.txt # babel + # celery # django pyyaml==6.0 # via # -r requirements/test.txt # code-annotations + # responses readme-renderer==37.3 # via twine requests==2.29.0 # via + # -r requirements/test.txt # requests-toolbelt + # responses # sphinx # twine -requests-toolbelt==0.10.1 +requests-toolbelt==1.0.0 # via twine +responses==0.23.1 + # via -r requirements/test.txt restructuredtext-lint==1.4.0 # via doc8 rfc3986==2.0.0 # via twine rich==13.3.5 # via twine -secretstorage==3.3.3 - # via keyring six==1.16.0 - # via bleach + # via + # -r requirements/test.txt + # bleach + # click-repl snowballstemmer==2.2.0 # via sphinx soupsieve==2.4.1 @@ -200,6 +276,8 @@ stevedore==5.0.0 # -r requirements/test.txt # code-annotations # doc8 + # edx-django-utils + # edx-opaque-keys text-unidecode==1.3 # via # -r requirements/test.txt @@ -214,14 +292,30 @@ tomli==2.0.1 # pytest twine==4.0.2 # via -r requirements/doc.in +types-pyyaml==6.0.12.9 + # via + # -r requirements/test.txt + # responses typing-extensions==4.5.0 # via # pydata-sphinx-theme # rich urllib3==1.26.15 # via + # -r requirements/test.txt # requests + # responses # twine +vine==5.0.0 + # via + # -r requirements/test.txt + # amqp + # celery + # kombu +wcwidth==0.2.6 + # via + # -r requirements/test.txt + # prompt-toolkit webencodings==0.5.1 # via bleach zipp==3.15.0 diff --git a/requirements/quality.txt b/requirements/quality.txt index 328a6ee..70c0acd 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -4,6 +4,10 @@ # # make upgrade # +amqp==5.1.1 + # via + # -r requirements/test.txt + # kombu asgiref==3.6.0 # via # -r requirements/test.txt @@ -12,14 +16,49 @@ astroid==2.15.4 # via # pylint # pylint-celery +billiard==3.6.4.0 + # via + # -r requirements/test.txt + # celery +celery==5.2.7 + # via -r requirements/test.txt +certifi==2022.12.7 + # via + # -r requirements/test.txt + # requests +cffi==1.15.1 + # via + # -r requirements/test.txt + # pynacl +charset-normalizer==3.1.0 + # via + # -r requirements/test.txt + # requests click==8.1.3 # via # -r requirements/test.txt + # celery + # click-didyoumean # click-log + # click-plugins + # click-repl # code-annotations + # edx-django-utils # edx-lint +click-didyoumean==0.3.0 + # via + # -r requirements/test.txt + # celery click-log==0.4.0 # via edx-lint +click-plugins==1.1.1 + # via + # -r requirements/test.txt + # celery +click-repl==0.2.0 + # via + # -r requirements/test.txt + # celery code-annotations==1.3.0 # via # -r requirements/test.txt @@ -30,16 +69,34 @@ coverage[toml]==7.2.5 # pytest-cov dill==0.3.6 # via pylint -django==3.2.18 +django==3.2.19 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/test.txt + # django-crum + # edx-django-utils +django-crum==0.7.9 + # via + # -r requirements/test.txt + # edx-django-utils +django-waffle==3.0.0 + # via + # -r requirements/test.txt + # edx-django-utils +edx-django-utils==5.4.0 + # via -r requirements/test.txt edx-lint==5.3.4 # via -r requirements/quality.in +edx-opaque-keys==2.3.0 + # via -r requirements/test.txt exceptiongroup==1.1.1 # via # -r requirements/test.txt # pytest +idna==3.4 + # via + # -r requirements/test.txt + # requests iniconfig==2.0.0 # via # -r requirements/test.txt @@ -52,6 +109,10 @@ jinja2==3.1.2 # via # -r requirements/test.txt # code-annotations +kombu==5.2.4 + # via + # -r requirements/test.txt + # celery lazy-object-proxy==1.9.0 # via astroid markupsafe==2.1.2 @@ -60,6 +121,10 @@ markupsafe==2.1.2 # jinja2 mccabe==0.7.0 # via pylint +newrelic==8.8.0 + # via + # -r requirements/test.txt + # edx-django-utils packaging==23.1 # via # -r requirements/test.txt @@ -74,8 +139,20 @@ pluggy==1.0.0 # via # -r requirements/test.txt # pytest +prompt-toolkit==3.0.38 + # via + # -r requirements/test.txt + # click-repl +psutil==5.9.5 + # via + # -r requirements/test.txt + # edx-django-utils pycodestyle==2.10.0 # via -r requirements/quality.in +pycparser==2.21 + # via + # -r requirements/test.txt + # cffi pydocstyle==6.3.0 # via -r requirements/quality.in pylint==2.17.3 @@ -92,6 +169,14 @@ pylint-plugin-utils==0.7 # via # pylint-celery # pylint-django +pymongo==3.13.0 + # via + # -r requirements/test.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/test.txt + # edx-django-utils pytest==7.3.1 # via # -r requirements/test.txt @@ -108,13 +193,24 @@ python-slugify==8.0.1 pytz==2023.3 # via # -r requirements/test.txt + # celery # django pyyaml==6.0 # via # -r requirements/test.txt # code-annotations + # responses +requests==2.29.0 + # via + # -r requirements/test.txt + # responses +responses==0.23.1 + # via -r requirements/test.txt six==1.16.0 - # via edx-lint + # via + # -r requirements/test.txt + # click-repl + # edx-lint snowballstemmer==2.2.0 # via pydocstyle sqlparse==0.4.4 @@ -125,6 +221,8 @@ stevedore==5.0.0 # via # -r requirements/test.txt # code-annotations + # edx-django-utils + # edx-opaque-keys text-unidecode==1.3 # via # -r requirements/test.txt @@ -137,9 +235,28 @@ tomli==2.0.1 # pytest tomlkit==0.11.8 # via pylint +types-pyyaml==6.0.12.9 + # via + # -r requirements/test.txt + # responses typing-extensions==4.5.0 # via # astroid # pylint +urllib3==1.26.15 + # via + # -r requirements/test.txt + # requests + # responses +vine==5.0.0 + # via + # -r requirements/test.txt + # amqp + # celery + # kombu +wcwidth==0.2.6 + # via + # -r requirements/test.txt + # prompt-toolkit wrapt==1.15.0 # via astroid diff --git a/requirements/test.in b/requirements/test.in index 6797160..bb78ea0 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -6,3 +6,4 @@ pytest-cov # pytest extension for code coverage statistics pytest-django # pytest extension for better Django support code-annotations # provides commands used by the pii_check make target. +responses # mocks for the requests library diff --git a/requirements/test.txt b/requirements/test.txt index 66eec31..dd30475 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,12 +4,53 @@ # # make upgrade # +amqp==5.1.1 + # via + # -r requirements/base.txt + # kombu asgiref==3.6.0 # via # -r requirements/base.txt # django +billiard==3.6.4.0 + # via + # -r requirements/base.txt + # celery +celery==5.2.7 + # via -r requirements/base.txt +certifi==2022.12.7 + # via + # -r requirements/base.txt + # requests +cffi==1.15.1 + # via + # -r requirements/base.txt + # pynacl +charset-normalizer==3.1.0 + # via + # -r requirements/base.txt + # requests click==8.1.3 - # via code-annotations + # via + # -r requirements/base.txt + # celery + # click-didyoumean + # click-plugins + # click-repl + # code-annotations + # edx-django-utils +click-didyoumean==0.3.0 + # via + # -r requirements/base.txt + # celery +click-plugins==1.1.1 + # via + # -r requirements/base.txt + # celery +click-repl==0.2.0 + # via + # -r requirements/base.txt + # celery code-annotations==1.3.0 # via -r requirements/test.in coverage[toml]==7.2.5 @@ -17,20 +58,68 @@ coverage[toml]==7.2.5 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/base.txt + # django-crum + # edx-django-utils +django-crum==0.7.9 + # via + # -r requirements/base.txt + # edx-django-utils +django-waffle==3.0.0 + # via + # -r requirements/base.txt + # edx-django-utils +edx-django-utils==5.4.0 + # via -r requirements/base.txt +edx-opaque-keys==2.3.0 + # via -r requirements/base.txt exceptiongroup==1.1.1 # via pytest +idna==3.4 + # via + # -r requirements/base.txt + # requests iniconfig==2.0.0 # via pytest jinja2==3.1.2 # via code-annotations +kombu==5.2.4 + # via + # -r requirements/base.txt + # celery markupsafe==2.1.2 # via jinja2 +newrelic==8.8.0 + # via + # -r requirements/base.txt + # edx-django-utils packaging==23.1 # via pytest pbr==5.11.1 - # via stevedore + # via + # -r requirements/base.txt + # stevedore pluggy==1.0.0 # via pytest +prompt-toolkit==3.0.38 + # via + # -r requirements/base.txt + # click-repl +psutil==5.9.5 + # via + # -r requirements/base.txt + # edx-django-utils +pycparser==2.21 + # via + # -r requirements/base.txt + # cffi +pymongo==3.13.0 + # via + # -r requirements/base.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/base.txt + # edx-django-utils pytest==7.3.1 # via # pytest-cov @@ -44,18 +133,52 @@ python-slugify==8.0.1 pytz==2023.3 # via # -r requirements/base.txt + # celery # django pyyaml==6.0 - # via code-annotations + # via + # code-annotations + # responses +requests==2.29.0 + # via + # -r requirements/base.txt + # responses +responses==0.23.1 + # via -r requirements/test.in +six==1.16.0 + # via + # -r requirements/base.txt + # click-repl sqlparse==0.4.4 # via # -r requirements/base.txt # django stevedore==5.0.0 - # via code-annotations + # via + # -r requirements/base.txt + # code-annotations + # edx-django-utils + # edx-opaque-keys text-unidecode==1.3 # via python-slugify tomli==2.0.1 # via # coverage # pytest +types-pyyaml==6.0.12.9 + # via responses +urllib3==1.26.15 + # via + # -r requirements/base.txt + # requests + # responses +vine==5.0.0 + # via + # -r requirements/base.txt + # amqp + # celery + # kombu +wcwidth==0.2.6 + # via + # -r requirements/base.txt + # prompt-toolkit diff --git a/setup.py b/setup.py index ea1feb7..55e68c3 100755 --- a/setup.py +++ b/setup.py @@ -112,7 +112,13 @@ def is_requirement(line): include=['event_sink_clickhouse', 'event_sink_clickhouse.*'], exclude=["*tests"], ), - + entry_points={ + "lms.djangoapp": [ + ], + "cms.djangoapp": [ + "event-sink-clickhouse = event_sink_clickhouse.apps:EventSinkClickhouseConfig", + ], + }, include_package_data=True, install_requires=load_requirements('requirements/base.in'), python_requires=">=3.8", diff --git a/test_utils/helpers.py b/test_utils/helpers.py new file mode 100644 index 0000000..b1de1fd --- /dev/null +++ b/test_utils/helpers.py @@ -0,0 +1,210 @@ +""" +Helper functions for tests +""" + +import csv +import random +import string +from datetime import datetime +from io import StringIO +from unittest.mock import MagicMock, Mock + +from opaque_keys.edx.keys import CourseKey +from opaque_keys.edx.locator import BlockUsageLocator + +from event_sink_clickhouse.sinks.course_published import CoursePublishedSink + +ORIG_IMPORT = __import__ +ORG = "testorg" +COURSE = "testcourse" +COURSE_RUN = "2023_Fall" + + +class FakeXBlock: + """ + Fakes the parameters of an XBlock that we care about. + """ + def __init__(self, identifier, detached_block=False): + self.block_type = "course_info" if detached_block else "vertical" + self.scope_ids = Mock() + self.scope_ids.usage_id.course_key = course_key_factory() + self.scope_ids.block_type = self.block_type + self.location = block_usage_locator_factory() + self.display_name_with_default = f"Display name {identifier}" + self.edited_on = datetime.now() + self.children = [] + + def get_children(self): + """ + Fakes the method of the same name from an XBlock. + """ + return self.children + + +def course_str_factory(): + """ + Return a valid course key string. + """ + course_str = f"course-v1:{ORG}+{COURSE}+{COURSE_RUN}" + return course_str + + +def course_key_factory(): + """ + Return a CourseKey object from our course key string. + """ + return CourseKey.from_string(course_str_factory()) + + +def block_usage_locator_factory(): + """ + Create a BlockUsageLocator with a random id. + """ + block_id = ''.join(random.choices(string.ascii_letters, k=10)) + return BlockUsageLocator(course_key_factory(), block_type="category", block_id=block_id, deprecated=True) + + +def mock_course_overview(): + """ + Create a fake CourseOverview object that supports just the things we care about. + """ + mock_overview = MagicMock() + mock_overview.get_from_id = MagicMock() + mock_overview.get_from_id.return_value.modified = datetime.now() + return mock_overview + + +def mock_detached_xblock_types(): + """ + Mock the return results of xmodule.modulestore.store_utilities.DETACHED_XBLOCK_TYPES + """ + # Current values as of 2023-05-01 + return {'static_tab', 'about', 'course_info'} + + +def get_clickhouse_http_params(): + """ + Get the params used in ClickHouse queries. + """ + blocks_params = { + "input_format_allow_errors_num": 1, + "input_format_allow_errors_ratio": 0.1, + "query": "INSERT INTO cool_data.course_blocks FORMAT CSV" + } + relationships_params = { + "input_format_allow_errors_num": 1, + "input_format_allow_errors_ratio": 0.1, + "query": "INSERT INTO cool_data.course_relationships FORMAT CSV" + } + + return blocks_params, relationships_params + + +def course_factory(): + """ + Return a fake course structure that exercises most of the serialization features. + """ + # Create a base block + top_block = FakeXBlock("top") + course = [top_block, ] + + # Create a few children + for i in range(3): + block = FakeXBlock(f"Child {i}") + course.append(block) + top_block.children.append(block) + + # Create grandchildren on some children + if i > 0: + sub_block = FakeXBlock(f"Grandchild {i}") + course.append(sub_block) + block.children.append(sub_block) + + # Create some detached blocks at the top level + for i in range(3): + course.append(FakeXBlock(f"Detached {i}", detached_block=True)) + + return course + + +def check_block_csv_matcher(course): + """ + Match the course structure CSV against the test course. + + This is a matcher for the "responses" library. It returns a function + that actually does the matching. + """ + def match(request): + body = request.body + lines = body.split("\n")[:-1] + + # There should be one CSV line for each block in the test course + if len(lines) != len(course): + return False, f"Body has {len(lines)} lines, course has {len(course)}" + + f = StringIO(body) + reader = csv.reader(f) + + i = 0 + try: + # The CSV should be in the same order as our course, make sure + # everything matches + for row in reader: + block = course[i] + assert row[0] == block.location.org + assert row[1] == str(block.location.course_key) + assert row[2] == block.location.course + assert row[3] == block.location.run + assert row[4] == str(course[i].location) + assert row[5] == block.display_name_with_default + assert row[6] == str(block.block_type) + i += 1 + except AssertionError as e: + return False, f"Mismatch in row {i}: {e}" + return True, "" + return match + + +def check_relationship_csv_matcher(course): + """ + Match the relationship CSV against the test course. + + This is a matcher for the "responses" library. It returns a function + that actually does the matching. + """ + # Build our own copy of the test relationships first + relationships = [] + for block in course: + course_key = str(block.location.course_key) + for _, child in enumerate(block.get_children()): + parent_node = str(CoursePublishedSink.strip_branch_and_version(block.location)) + child_node = str(CoursePublishedSink.strip_branch_and_version(child.location)) + relationships.append((course_key, parent_node, child_node)) + + def match(request): + body = request.body + lines = body.split("\n")[:-1] + + # The relationships CSV should have the same number of relationships as our test + if len(lines) != len(relationships): + return False, f"Body has {len(lines)} lines but there are {len(relationships)} relationships" + + f = StringIO(body) + reader = csv.reader(f) + + i = 0 + try: + # The CSV should be in the same order as our relationships, make sure + # everything matches + for row in reader: + print(row) + print(relationships[i]) + relation = relationships[i] + assert row[0] == relation[0] + assert row[1] == relation[1] + assert row[2] == relation[2] + i += 1 + except AssertionError as e: + return False, f"Mismatch in row {i}: {e}" + return True, "" + return match diff --git a/tests/test_course_published.py b/tests/test_course_published.py new file mode 100644 index 0000000..037be6a --- /dev/null +++ b/tests/test_course_published.py @@ -0,0 +1,120 @@ +""" +Tests for the course_published sinks. +""" +import logging +from datetime import datetime +from unittest.mock import patch + +import pytest +import requests +import responses +from responses import matchers +from responses.registries import OrderedRegistry + +from event_sink_clickhouse.sinks.course_published import CoursePublishedSink +from event_sink_clickhouse.tasks import dump_course_to_clickhouse +from test_utils.helpers import ( + check_block_csv_matcher, + check_relationship_csv_matcher, + course_factory, + course_str_factory, + get_clickhouse_http_params, + mock_course_overview, + mock_detached_xblock_types, +) + + +@responses.activate(registry=OrderedRegistry) # pylint: disable=unexpected-keyword-arg,no-value-for-parameter +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_detached_xblock_types") +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_modulestore") +def test_course_publish_success(mock_modulestore, mock_detached, caplog): + """ + Test of a successful end-to-end run. + """ + # Necessary to get logs from the task + caplog.set_level(logging.INFO, logger="edx.celery.task") + + # Create a fake course structure with a few fake XBlocks + course = course_factory() + mock_modulestore.return_value.get_items.return_value = course + + # Fake the "detached types" list since we can't import it here + mock_detached.return_value = mock_detached_xblock_types() + + # Use the responses library to catch the POSTs to ClickHouse + # and match them against the expected values, including CSV + # content + blocks_params, relationships_params = get_clickhouse_http_params() + + responses.post( + "https://foo.bar/", + match=[ + matchers.query_param_matcher(blocks_params), + check_block_csv_matcher(course) + ], + ) + responses.post( + "https://foo.bar/", + match=[ + matchers.query_param_matcher(relationships_params), + check_relationship_csv_matcher(course) + ], + ) + + course = course_str_factory() + dump_course_to_clickhouse(course) + + # Just to make sure we're not calling things more than we need to + assert mock_modulestore.call_count == 1 + assert mock_detached.call_count == 1 + + +@responses.activate(registry=OrderedRegistry) # pylint: disable=unexpected-keyword-arg,no-value-for-parameter +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_detached_xblock_types") +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_modulestore") +def test_course_publish_clickhouse_error(mock_modulestore, mock_detached, caplog): + """ + Test the case where a ClickHouse POST fails. + """ + caplog.set_level(logging.INFO, logger="edx.celery.task") + course = course_factory() + mock_modulestore.return_value.get_items.return_value = course + mock_detached.return_value = mock_detached_xblock_types() + + # This will raise an exception when we try to post to ClickHouse + responses.post( + "https://foo.bar/", + body="Test Bad Request error", + status=400 + ) + + course = course_str_factory() + + with pytest.raises(requests.exceptions.RequestException): + dump_course_to_clickhouse(course) + + # Make sure everything was called as we expect + assert mock_modulestore.call_count == 1 + assert mock_detached.call_count == 1 + + # Make sure our log messages went through. + assert "Test Bad Request error" in caplog.text + assert f"Error trying to dump course {course} to ClickHouse!" in caplog.text + + +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_course_overview_model") +def test_get_course_last_published(mock_overview): + """ + This function isn't in use yet, but we'll need it for the management command + """ + # Create a fake course overview, which will return a datetime object + course = mock_course_overview() + mock_overview.return_value = course + + # Request our course last published date + course_key = course_str_factory() + + # Confirm that the string date we get back is a valid date + last_published_date = CoursePublishedSink.get_course_last_published(course_key) + dt = datetime.strptime(last_published_date, "%Y-%m-%d %H:%M:%S.%f") + assert dt diff --git a/tests/test_django_settings.py b/tests/test_django_settings.py new file mode 100644 index 0000000..52ff506 --- /dev/null +++ b/tests/test_django_settings.py @@ -0,0 +1,55 @@ +""" +Test plugin settings +""" + +from django.conf import settings +from django.test import TestCase + +from event_sink_clickhouse.settings import common as common_settings +from event_sink_clickhouse.settings import production as production_setttings + + +class TestPluginSettings(TestCase): + """ + Tests plugin settings + """ + + def test_common_settings(self): + """ + Test common settings + """ + common_settings.plugin_settings(settings) + + for key in ("url", "username", "password", "database", "timeout_secs"): + assert key in settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG + + def test_production_settings(self): + """ + Test production settings + """ + test_url = "https://foo.bar" + test_username = "bob" + test_password = "secret" + test_database = "cool_data" + test_timeout = 1 + + settings.ENV_TOKENS = { + 'EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG': { + "url": test_url, + "username": test_username, + "password": test_password, + "database": test_database, + "timeout_secs": test_timeout + } + } + production_setttings.plugin_settings(settings) + + for key, val in ( + ("url", test_url), + ("username", test_username), + ("password", test_password), + ("database", test_database), + ("timeout_secs", test_timeout), + ): + assert key in settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG + assert settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG[key] == val