Skip to content
This repository has been archived by the owner on Apr 3, 2024. It is now read-only.

Add tree location and completion_mode to XBlock serialization #49

Merged
merged 4 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion event_sink_clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
A sink for Open edX events to send them to ClickHouse.
"""

__version__ = "0.3.0"
__version__ = "0.4.0"
23 changes: 22 additions & 1 deletion event_sink_clickhouse/sinks/course_published.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ def serialize_item(self, item, many=False, initial=None):
# Serialize the XBlocks to dicts and map them with their location as keys the
# whole map needs to be completed before we can define relationships
index = 0
section_idx = 0
subsection_idx = 0
unit_idx = 0

for block in items:
index += 1
fields = self.serialize_xblock(
Expand All @@ -88,6 +92,22 @@ def serialize_item(self, item, many=False, initial=None):
initial["dump_id"],
initial["time_last_dumped"],
)

if fields["xblock_data_json"]["block_type"] == "chapter":
section_idx += 1
subsection_idx = 0
unit_idx = 0
elif fields["xblock_data_json"]["block_type"] == "sequential":
subsection_idx += 1
unit_idx = 0
elif fields["xblock_data_json"]["block_type"] == "vertical":
unit_idx += 1

fields["xblock_data_json"]["section"] = section_idx
fields["xblock_data_json"]["subsection"] = subsection_idx
fields["xblock_data_json"]["unit"] = unit_idx

fields["xblock_data_json"] = json.dumps(fields["xblock_data_json"])
location_to_node[
XBlockSink.strip_branch_and_version(block.location)
] = fields
Expand Down Expand Up @@ -147,6 +167,7 @@ def serialize_xblock(
"block_type": block_type,
"detached": 1 if block_type in detached_xblock_types else 0,
"graded": 1 if getattr(item, "graded", False) else 0,
"completion_mode": getattr(item, "completion_mode", ""),
}

# Core table data, if things change here it's a big deal.
Expand All @@ -155,7 +176,7 @@ def serialize_xblock(
"course_key": str(course_key),
"location": str(item.location),
"display_name": item.display_name_with_default.replace("'", "'"),
"xblock_data_json": json.dumps(json_data),
"xblock_data_json": json_data,
"order": index,
"edited_on": str(getattr(item, "edited_on", "")),
"dump_id": dump_id,
Expand Down
67 changes: 57 additions & 10 deletions test_utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ class FakeXBlock:
"""
Fakes the parameters of an XBlock that we care about.
"""
def __init__(self, identifier, detached_block=False, graded=False):
self.block_type = "course_info" if detached_block else "vertical"
def __init__(self, identifier, block_type="vertical", graded=False, completion_mode="unknown"):
self.block_type = block_type
self.scope_ids = Mock()
self.scope_ids.usage_id.course_key = course_key_factory()
self.scope_ids.block_type = self.block_type
Expand All @@ -62,6 +62,7 @@ def __init__(self, identifier, detached_block=False, graded=False):
self.edited_on = datetime.now()
self.children = []
self.graded = graded
self.completion_mode = completion_mode

def get_children(self):
"""
Expand Down Expand Up @@ -124,6 +125,40 @@ def fake_course_overview_factory(modified=None):
)


def fake_serialize_fake_course_overview(course_overview):
"""
Return a dict representation of a FakeCourseOverview.
"""
json_fields = {
"advertised_start": str(course_overview.advertised_start),
"announcement": str(course_overview.announcement),
"lowest_passing_grade": float(course_overview.lowest_passing_grade),
"invitation_only": course_overview.invitation_only,
"max_student_enrollments_allowed": course_overview.max_student_enrollments_allowed,
"effort": course_overview.effort,
"enable_proctored_exams": course_overview.enable_proctored_exams,
"entrance_exam_enabled": course_overview.entrance_exam_enabled,
"external_id": course_overview.external_id,
"language": course_overview.language,
}

return {
"org": course_overview.org,
"course_key": str(course_overview.id),
"display_name": course_overview.display_name,
"course_start": course_overview.start,
"course_end": course_overview.end,
"enrollment_start": course_overview.enrollment_start,
"enrollment_end": course_overview.enrollment_end,
"self_paced": course_overview.self_paced,
"course_data_json": json.dumps(json_fields),
"created": course_overview.created,
"modified": course_overview.modified,
"dump_id": "",
"time_last_dumped": "",
}


def mock_course_overview():
"""
Create a fake CourseOverview object that supports just the things we care about.
Expand Down Expand Up @@ -170,29 +205,41 @@ def course_factory():
Return a fake course structure that exercises most of the serialization features.
"""
# Create a base block
top_block = FakeXBlock("top")
top_block = FakeXBlock("top", block_type="course")
course = [top_block, ]

# Create a few children
# Create a few sections
for i in range(3):
block = FakeXBlock(f"Child {i}")
block = FakeXBlock(f"Section {i}", block_type="chapter")
course.append(block)
top_block.children.append(block)

# Create grandchildren on some children
# Create some subsections
if i > 0:
sub_block = FakeXBlock(f"Grandchild {i}")
course.append(sub_block)
block.children.append(sub_block)
for ii in range(3):
sub_block = FakeXBlock(f"Subsection {ii}", block_type="sequential")
course.append(sub_block)
block.children.append(sub_block)

for iii in range(3):
# Create some units
unit_block = FakeXBlock(f"Unit {iii}", block_type="vertical")
course.append(unit_block)
sub_block.children.append(unit_block)

# Create some detached blocks at the top level
for i in range(3):
course.append(FakeXBlock(f"Detached {i}", detached_block=True))
course.append(FakeXBlock(f"Detached {i}", block_type="course_info"))

# Create some graded blocks at the top level
for i in range(3):
course.append(FakeXBlock(f"Graded {i}", graded=True))

# Create some completable blocks at the top level
course.append(FakeXBlock("Completable", completion_mode="completable"))
course.append(FakeXBlock("Aggregator", completion_mode="aggregator"))
course.append(FakeXBlock("Excluded", completion_mode="excluded"))

return course


Expand Down
167 changes: 109 additions & 58 deletions tests/test_course_published.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json
import logging
from datetime import datetime
from unittest.mock import patch
from unittest.mock import MagicMock, patch

import pytest
import requests
Expand All @@ -13,7 +13,7 @@
from responses import matchers
from responses.registries import OrderedRegistry

from event_sink_clickhouse.sinks.course_published import CourseOverviewSink
from event_sink_clickhouse.sinks.course_published import CourseOverviewSink, XBlockSink
from event_sink_clickhouse.tasks import dump_course_to_clickhouse
from test_utils.helpers import (
check_block_csv_matcher,
Expand All @@ -22,6 +22,7 @@
course_factory,
course_str_factory,
fake_course_overview_factory,
fake_serialize_fake_course_overview,
get_clickhouse_http_params,
mock_course_overview,
mock_detached_xblock_types,
Expand All @@ -43,34 +44,7 @@ def test_course_publish_success(mock_modulestore, mock_detached, mock_overview,
course_overview = fake_course_overview_factory(modified=datetime.now())
mock_modulestore.return_value.get_items.return_value = course

json_fields = {
"advertised_start": str(course_overview.advertised_start),
"announcement": str(course_overview.announcement),
"lowest_passing_grade": float(course_overview.lowest_passing_grade),
"invitation_only": course_overview.invitation_only,
"max_student_enrollments_allowed": course_overview.max_student_enrollments_allowed,
"effort": course_overview.effort,
"enable_proctored_exams": course_overview.enable_proctored_exams,
"entrance_exam_enabled": course_overview.entrance_exam_enabled,
"external_id": course_overview.external_id,
"language": course_overview.language,
}

mock_serialize_item.return_value = {
"org": course_overview.org,
"course_key": str(course_overview.id),
"display_name": course_overview.display_name,
"course_start": course_overview.start,
"course_end": course_overview.end,
"enrollment_start": course_overview.enrollment_start,
"enrollment_end": course_overview.enrollment_end,
"self_paced": course_overview.self_paced,
"course_data_json": json.dumps(json_fields),
"created": course_overview.created,
"modified": course_overview.modified,
"dump_id": "",
"time_last_dumped": "",
}
mock_serialize_item.return_value = fake_serialize_fake_course_overview(course_overview)

# Fake the "detached types" list since we can't import it here
mock_detached.return_value = mock_detached_xblock_types()
Expand Down Expand Up @@ -129,34 +103,7 @@ def test_course_publish_clickhouse_error(mock_modulestore, mock_detached, mock_o
course_overview = fake_course_overview_factory(modified=datetime.now())
mock_overview.return_value.get_from_id.return_value = course_overview

json_fields = {
"advertised_start": str(course_overview.advertised_start),
"announcement": str(course_overview.announcement),
"lowest_passing_grade": float(course_overview.lowest_passing_grade),
"invitation_only": course_overview.invitation_only,
"max_student_enrollments_allowed": course_overview.max_student_enrollments_allowed,
"effort": course_overview.effort,
"enable_proctored_exams": course_overview.enable_proctored_exams,
"entrance_exam_enabled": course_overview.entrance_exam_enabled,
"external_id": course_overview.external_id,
"language": course_overview.language,
}

mock_serialize_item.return_value = {
"org": course_overview.org,
"course_key": str(course_overview.id),
"display_name": course_overview.display_name,
"course_start": course_overview.start,
"course_end": course_overview.end,
"enrollment_start": course_overview.enrollment_start,
"enrollment_end": course_overview.enrollment_end,
"self_paced": course_overview.self_paced,
"course_data_json": json.dumps(json_fields),
"created": course_overview.created,
"modified": course_overview.modified,
"dump_id": "",
"time_last_dumped": "",
}
mock_serialize_item.return_value = fake_serialize_fake_course_overview(course_overview)

# This will raise an exception when we try to post to ClickHouse
responses.post(
Expand Down Expand Up @@ -265,3 +212,107 @@ def test_get_last_dump_time():
last_published_date = sink.get_last_dumped_timestamp(course_key)
dt = datetime.strptime(last_published_date, "%Y-%m-%d %H:%M:%S.%f+00:00")
assert dt


@patch("event_sink_clickhouse.sinks.course_published.get_detached_xblock_types")
@patch("event_sink_clickhouse.sinks.course_published.get_modulestore")
# pytest:disable=unused-argument
def test_xblock_tree_structure(mock_modulestore, mock_detached):
"""
Test that our calculations of section/subsection/unit are correct.
"""
# Create a fake course structure with a few fake XBlocks
course = course_factory()
course_overview = fake_course_overview_factory(modified=datetime.now())
mock_modulestore.return_value.get_items.return_value = course

# Fake the "detached types" list since we can't import it here
mock_detached.return_value = mock_detached_xblock_types()

fake_serialized_course_overview = fake_serialize_fake_course_overview(course_overview)
sink = XBlockSink(connection_overrides={}, log=MagicMock())

# Remove the relationships sink, we're just checking the structure here.
sink.serialize_relationships = MagicMock()
initial_data = {"dump_id": "xyz", "time_last_dumped": "2023-09-05"}
results = sink.serialize_item(fake_serialized_course_overview, initial=initial_data)

def _check_tree_location(block, expected_section=0, expected_subsection=0, expected_unit=0):
"""
Assert the expected values in certain returned blocks or print useful debug information.
"""
try:
j = json.loads(block["xblock_data_json"])
assert j["section"] == expected_section
assert j["subsection"] == expected_subsection
assert j["unit"] == expected_unit
except AssertionError as e:
print(e)
print(block)
raise

# The tree has new sections at these indexes
_check_tree_location(results[1], 1)
_check_tree_location(results[2], 2)
_check_tree_location(results[15], 3)

# The tree has new subsections at these indexes
_check_tree_location(results[3], 2, 1)
_check_tree_location(results[7], 2, 2)
_check_tree_location(results[11], 2, 3)
_check_tree_location(results[24], 3, 3)

# The tree has new units at these indexes
_check_tree_location(results[4], 2, 1, 1)
_check_tree_location(results[5], 2, 1, 2)
_check_tree_location(results[6], 2, 1, 3)
_check_tree_location(results[10], 2, 2, 3)
_check_tree_location(results[25], 3, 3, 1)
_check_tree_location(results[26], 3, 3, 2)
_check_tree_location(results[27], 3, 3, 3)


@patch("event_sink_clickhouse.sinks.course_published.get_detached_xblock_types")
@patch("event_sink_clickhouse.sinks.course_published.get_modulestore")
def test_xblock_graded_completable_mode(mock_modulestore, mock_detached):
"""
Test that our grading and completion fields serialize.
"""
# Create a fake course structure with a few fake XBlocks
course = course_factory()
course_overview = fake_course_overview_factory(modified=datetime.now())
mock_modulestore.return_value.get_items.return_value = course

# Fake the "detached types" list since we can't import it here
mock_detached.return_value = mock_detached_xblock_types()

fake_serialized_course_overview = fake_serialize_fake_course_overview(course_overview)
sink = XBlockSink(connection_overrides={}, log=MagicMock())

# Remove the relationships sink, we're just checking the structure here.
sink.serialize_relationships = MagicMock()
initial_data = {"dump_id": "xyz", "time_last_dumped": "2023-09-05"}
results = sink.serialize_item(fake_serialized_course_overview, initial=initial_data)

def _check_item_serialized_location(block, expected_graded=0, expected_completion_mode="unknown"):
"""
Assert the expected values in certain returned blocks or print useful debug information.
"""
try:
j = json.loads(block["xblock_data_json"])
assert j["graded"] == expected_graded
assert j["completion_mode"] == expected_completion_mode
except AssertionError as e:
print(e)
print(block)
raise

# These tree indexes are the only ones which should have gradable set
_check_item_serialized_location(results[31], 1)
_check_item_serialized_location(results[32], 1)
_check_item_serialized_location(results[33], 1)

# These tree indexes are the only ones which should have non-"unknown" completion_modes.
_check_item_serialized_location(results[34], 0, "completable")
_check_item_serialized_location(results[35], 0, "aggregator")
_check_item_serialized_location(results[36], 0, "excluded")
Loading