Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions learning_resources/etl/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,14 +516,24 @@ def load_programs(
blocklist = load_course_blocklist()
duplicates = load_course_duplicates(etl_source)

return [
program
for program in [
load_program(program_data, blocklist, duplicates, config=config)
for program_data in programs_data
]
if program is not None
programs = [
load_program(program_data, blocklist, duplicates, config=config)
for program_data in programs_data
]
if programs and config.prune:
for learning_resource in LearningResource.objects.filter(
etl_source=etl_source, resource_type=LearningResourceType.program.name
).exclude(
id__in=[
learning_resource.id
for learning_resource in programs
if learning_resource is not None
]
):
learning_resource.published = False
learning_resource.save()
resource_unpublished_actions(learning_resource)
return [program for program in programs if program is not None]


def load_content_file(
Expand Down
9 changes: 6 additions & 3 deletions learning_resources/etl/loaders_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,11 +743,14 @@ def test_load_courses(mocker, mock_blocklist, mock_duplicates, prune):

def test_load_programs(mocker, mock_blocklist, mock_duplicates):
"""Test that load_programs calls the expected functions"""
program_data = [{"courses": [{"platform": "a"}, {}]}]
program_data = [{"courses": [{"platform": "a"}, {}], "id": 5}]

mock_load_program = mocker.patch(
"learning_resources.etl.loaders.load_program", autospec=True
"learning_resources.etl.loaders.load_program",
autospec=True,
return_value=ProgramFactory.create().learning_resource,
)
load_programs("mitx", program_data)
load_programs("mitx", program_data, config=ProgramLoaderConfig(prune=True))
assert mock_load_program.call_count == len(program_data)
mock_blocklist.assert_called_once()
mock_duplicates.assert_called_once_with("mitx")
Expand Down
28 changes: 25 additions & 3 deletions learning_resources/etl/mitxonline.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,15 @@ def parse_page_attribute(
def extract_programs():
"""Loads the MITx Online catalog data""" # noqa: D401
if settings.MITX_ONLINE_PROGRAMS_API_URL:
return list(_fetch_data(settings.MITX_ONLINE_PROGRAMS_API_URL))
return list(
_fetch_data(
settings.MITX_ONLINE_PROGRAMS_API_URL,
params={
"page__live": True,
"live": True,
},
)
)
else:
log.warning("Missing required setting MITX_ONLINE_PROGRAMS_API_URL")

Expand All @@ -100,7 +108,15 @@ def extract_programs():
def extract_courses():
"""Loads the MITx Online catalog data""" # noqa: D401
if settings.MITX_ONLINE_COURSES_API_URL:
return list(_fetch_data(settings.MITX_ONLINE_COURSES_API_URL))
return list(
_fetch_data(
settings.MITX_ONLINE_COURSES_API_URL,
params={
"page__live": True,
"live": True,
},
)
)
else:
log.warning("Missing required setting MITX_ONLINE_COURSES_API_URL")

Expand Down Expand Up @@ -210,6 +226,8 @@ def _transform_course(course):
},
"published": bool(
parse_page_attribute(course, "page_url")
and parse_page_attribute(course, "live")
and course.get("live", False)
), # a course is only considered published if it has a page url
"professional": False,
"certification": has_certification,
Expand Down Expand Up @@ -244,7 +262,11 @@ def _fetch_courses_by_ids(course_ids):
return list(
_fetch_data(
settings.MITX_ONLINE_COURSES_API_URL,
params={"id": ",".join([str(courseid) for courseid in course_ids])},
params={
"id": ",".join([str(courseid) for courseid in course_ids]),
"page__live": True,
"live": True,
},
)
)

Expand Down
8 changes: 7 additions & 1 deletion learning_resources/etl/mitxonline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ def test_mitxonline_transform_programs(
),
"published": bool(
course_data.get("page", {}).get("page_url", None)
and course_data.get("page", {}).get("live", None)
and course_data.get("live", None)
),
"certification": True,
"certification_type": CertificationType.completion.name,
Expand Down Expand Up @@ -303,7 +305,11 @@ def test_mitxonline_transform_courses(settings, mock_mitxonline_courses_data):
course_data.get("page", {}).get("description", None)
),
"offered_by": OFFERED_BY,
"published": course_data.get("page", {}).get("page_url", None) is not None,
"published": bool(
course_data.get("page", {}).get("page_url", None)
and course_data.get("page", {}).get("live", None)
and course_data.get("live", None)
),
"professional": False,
"certification": parse_certification(
"mitx",
Expand Down
2 changes: 1 addition & 1 deletion learning_resources/etl/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
mitxonline_programs_etl = compose(
load_programs(
ETLSource.mitxonline.name,
config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True)),
config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True), prune=True),
),
mitxonline.transform_programs,
mitxonline.extract_programs,
Expand Down
2 changes: 1 addition & 1 deletion learning_resources/etl/pipelines_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_mitxonline_programs_etl():
mock_load_programs.assert_called_once_with(
ETLSource.mitxonline.name,
mock_transform.return_value,
config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True)),
config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True), prune=True),
)

assert result == mock_load_programs.return_value
Expand Down