Skip to content
This repository has been archived by the owner on Apr 3, 2024. It is now read-only.

Commit

Permalink
fix: use course_overview object in course_published dump
Browse files Browse the repository at this point in the history
test: add test for course_published changes

chore: quality fixes

test: add test for get_queryset changes in sinks

test: add test for should_dump_item in course_published

chore: use invalid emails

chore: quality fixes
  • Loading branch information
Ian2012 committed Feb 12, 2024
1 parent 9976c0a commit a43189d
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 33 deletions.
11 changes: 5 additions & 6 deletions event_sink_clickhouse/sinks/course_published.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ class CourseOverviewSink(ModelBaseSink): # pylint: disable=abstract-method
nested_sinks = [XBlockSink]
pk_format = str

def should_dump_item(self, unique_key):
def should_dump_item(self, item):
"""
Only dump the course if it's been changed since the last time it's been
dumped.
Expand All @@ -162,14 +162,14 @@ def should_dump_item(self, unique_key):
- reason why course needs, or does not need, to be dumped (string)
"""

course_last_dump_time = self.get_last_dumped_timestamp(unique_key)
course_last_dump_time = self.get_last_dumped_timestamp(item)

# If we don't have a record of the last time this command was run,
# we should serialize the course and dump it
if course_last_dump_time is None:
return True, "Course is not present in ClickHouse"

course_last_published_date = self.get_course_last_published(unique_key)
course_last_published_date = self.get_course_last_published(item)

# If we've somehow dumped this course but there is no publish date
# skip it
Expand Down Expand Up @@ -197,7 +197,7 @@ def should_dump_item(self, unique_key):
)
return needs_dump, reason

def get_course_last_published(self, course_key):
def get_course_last_published(self, course_overview):
"""
Get approximate last publish date for the given course.
We use the 'modified' column in the CourseOverview table as a quick and easy
Expand All @@ -211,8 +211,7 @@ def get_course_last_published(self, course_key):
is sortable and similar to ISO 8601:
https://docs.python.org/3/library/datetime.html#datetime.date.__str__
"""
CourseOverview = self.get_model()
approx_last_published = CourseOverview.get_from_id(course_key).modified
approx_last_published = course_overview.modified
if approx_last_published:
return str(approx_last_published)

Expand Down
3 changes: 1 addition & 2 deletions test_utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,7 @@ def mock_course_overview():
Create a fake CourseOverview object that supports just the things we care about.
"""
mock_overview = MagicMock()
mock_overview.get_from_id = MagicMock()
mock_overview.get_from_id.return_value = fake_course_overview_factory(datetime.now())
mock_overview.return_value = fake_course_overview_factory(datetime.now())
return mock_overview


Expand Down
12 changes: 6 additions & 6 deletions tests/commands/test_dump_data_to_clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,18 @@ class DummySink(ModelBaseSink):

def get_queryset(self, start_pk=None):
qs = MockSet(
MockModel(mock_name="john", email="john@edx.com", pk=1),
MockModel(mock_name="jeff", email="jeff@edx.com", pk=2),
MockModel(mock_name="bill", email="bill@edx.com", pk=3),
MockModel(mock_name="joe", email="joe@edx.com", pk=4),
MockModel(mock_name="jim", email="jim@edx.com", pk=5),
MockModel(mock_name="john", email="john@test.invalid", pk=1),
MockModel(mock_name="jeff", email="jeff@test.invalid", pk=2),
MockModel(mock_name="bill", email="bill@test.invalid", pk=3),
MockModel(mock_name="joe", email="joe@test.invalid", pk=4),
MockModel(mock_name="jim", email="jim@test.invalid", pk=5),
)
if start_pk:
qs = qs.filter(pk__gt=start_pk)
return qs

def should_dump_item(self, unique_key):
return unique_key.pk!=1, "No reason"
return unique_key.pk != 1, "No reason"

def send_item_and_log(self, item_id, serialized_item, many):
pass
Expand Down
48 changes: 29 additions & 19 deletions tests/test_course_published.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
fake_course_overview_factory,
fake_serialize_fake_course_overview,
get_clickhouse_http_params,
mock_course_overview,
mock_detached_xblock_types,
)

Expand Down Expand Up @@ -114,40 +113,28 @@ def test_course_publish_clickhouse_error(mock_modulestore, mock_detached, mock_o
assert f"Error trying to dump Course Overview {course} to ClickHouse!" in caplog.text


@patch("event_sink_clickhouse.sinks.course_published.CourseOverviewSink.get_model")
def test_get_course_last_published(mock_overview):
def test_get_course_last_published():
"""
Make sure we get a valid date back from this in the expected format.
"""
# Create a fake course overview, which will return a datetime object
course = mock_course_overview()
mock_overview.return_value = course

# Request our course last published date
course_key = course_str_factory()
course_overview = fake_course_overview_factory(modified=datetime.now())

# Confirm that the string date we get back is a valid date
last_published_date = CourseOverviewSink(None, None).get_course_last_published(course_key)
last_published_date = CourseOverviewSink(None, None).get_course_last_published(course_overview)
dt = datetime.strptime(last_published_date, "%Y-%m-%d %H:%M:%S.%f")
assert dt


@responses.activate(registry=OrderedRegistry) # pylint: disable=unexpected-keyword-arg,no-value-for-parameter
@patch("event_sink_clickhouse.sinks.course_published.CourseOverviewSink.get_model")
def test_no_last_published_date(mock_overview):
def test_no_last_published_date():
"""
Test that we get a None value back for courses that don't have a modified date.
In some cases there is not modified date on a course. In coursegraph we
skipped these if they are already in the database, so we're continuing this trend here.
"""
# Fake a course with no modified date
course = mock_course_overview()
mock_overview.return_value = course
mock_overview.return_value.get_from_id.return_value = fake_course_overview_factory(modified=None)

# Request our course last published date
course_key = course_str_factory()
course_overview = fake_course_overview_factory(modified=None)

# should_dump_course will reach out to ClickHouse for the last dump date
# we'll fake the response here to have any date, such that we'll exercise
Expand All @@ -159,12 +146,35 @@ def test_no_last_published_date(mock_overview):

# Confirm that the string date we get back is a valid date
sink = CourseOverviewSink(connection_overrides={}, log=logging.getLogger())
should_dump_course, reason = sink.should_dump_item(course_key)
should_dump_course, reason = sink.should_dump_item(course_overview)

assert should_dump_course is False
assert reason == "No last modified date in CourseOverview"


@responses.activate(registry=OrderedRegistry) # pylint: disable=unexpected-keyword-arg,no-value-for-parameter
def test_should_dump_item():
"""
Test that we get the expected results from should_dump_item.
"""
course_overview = fake_course_overview_factory(modified=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f+00:00"))

# should_dump_course will reach out to ClickHouse for the last dump date
# we'll fake the response here to have any date, such that we'll exercise
# all the "no modified date" code.
responses.get(
"https://foo.bar/",
body="2023-05-03 15:47:39.331024+00:00"
)

# Confirm that the string date we get back is a valid date
sink = CourseOverviewSink(connection_overrides={}, log=logging.getLogger())
should_dump_course, reason = sink.should_dump_item(course_overview)

assert should_dump_course is True
assert "Course has been published since last dump time - " in reason


@responses.activate(registry=OrderedRegistry) # pylint: disable=unexpected-keyword-arg,no-value-for-parameter
def test_course_not_present_in_clickhouse():
"""
Expand Down
20 changes: 20 additions & 0 deletions tests/test_external_id_sink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
Test the external_id_sink module.
"""

from unittest.mock import patch

from event_sink_clickhouse.sinks.external_id_sink import ExternalIdSink


@patch("event_sink_clickhouse.sinks.external_id_sink.ModelBaseSink.get_queryset")
def test_get_queryset(mock_get_queryset):
"""
Test the get_queryset method.
"""
sink = ExternalIdSink(None, None)

sink.get_queryset()

mock_get_queryset.assert_called_once_with(None)
mock_get_queryset.return_value.select_related.assert_called_once_with("user", "external_id_type")
20 changes: 20 additions & 0 deletions tests/test_user_profile_sink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
Test the external_id_sink module.
"""

from unittest.mock import patch

from event_sink_clickhouse.sinks.user_profile_sink import UserProfileSink


@patch("event_sink_clickhouse.sinks.external_id_sink.ModelBaseSink.get_queryset")
def test_get_queryset(mock_get_queryset):
"""
Test the get_queryset method.
"""
sink = UserProfileSink(None, None)

sink.get_queryset()

mock_get_queryset.assert_called_once_with(None)
mock_get_queryset.return_value.select_related.assert_called_once_with("user")

0 comments on commit a43189d

Please sign in to comment.