diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index 0f350a1e61..6cafffa2bc 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -516,14 +516,24 @@ def load_programs( blocklist = load_course_blocklist() duplicates = load_course_duplicates(etl_source) - return [ - program - for program in [ - load_program(program_data, blocklist, duplicates, config=config) - for program_data in programs_data - ] - if program is not None + programs = [ + load_program(program_data, blocklist, duplicates, config=config) + for program_data in programs_data ] + if programs and config.prune: + for learning_resource in LearningResource.objects.filter( + etl_source=etl_source, resource_type=LearningResourceType.program.name + ).exclude( + id__in=[ + learning_resource.id + for learning_resource in programs + if learning_resource is not None + ] + ): + learning_resource.published = False + learning_resource.save() + resource_unpublished_actions(learning_resource) + return [program for program in programs if program is not None] def load_content_file( diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index e4472ef2ec..d33cd3a9bc 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -743,11 +743,14 @@ def test_load_courses(mocker, mock_blocklist, mock_duplicates, prune): def test_load_programs(mocker, mock_blocklist, mock_duplicates): """Test that load_programs calls the expected functions""" - program_data = [{"courses": [{"platform": "a"}, {}]}] + program_data = [{"courses": [{"platform": "a"}, {}], "id": 5}] + mock_load_program = mocker.patch( - "learning_resources.etl.loaders.load_program", autospec=True + "learning_resources.etl.loaders.load_program", + autospec=True, + return_value=ProgramFactory.create().learning_resource, ) - load_programs("mitx", program_data) + load_programs("mitx", program_data, config=ProgramLoaderConfig(prune=True)) assert mock_load_program.call_count == len(program_data) mock_blocklist.assert_called_once() mock_duplicates.assert_called_once_with("mitx") diff --git a/learning_resources/etl/mitxonline.py b/learning_resources/etl/mitxonline.py index 0a1a6ca697..e25ec6a283 100644 --- a/learning_resources/etl/mitxonline.py +++ b/learning_resources/etl/mitxonline.py @@ -90,7 +90,15 @@ def parse_page_attribute( def extract_programs(): """Loads the MITx Online catalog data""" # noqa: D401 if settings.MITX_ONLINE_PROGRAMS_API_URL: - return list(_fetch_data(settings.MITX_ONLINE_PROGRAMS_API_URL)) + return list( + _fetch_data( + settings.MITX_ONLINE_PROGRAMS_API_URL, + params={ + "page__live": True, + "live": True, + }, + ) + ) else: log.warning("Missing required setting MITX_ONLINE_PROGRAMS_API_URL") @@ -100,7 +108,15 @@ def extract_programs(): def extract_courses(): """Loads the MITx Online catalog data""" # noqa: D401 if settings.MITX_ONLINE_COURSES_API_URL: - return list(_fetch_data(settings.MITX_ONLINE_COURSES_API_URL)) + return list( + _fetch_data( + settings.MITX_ONLINE_COURSES_API_URL, + params={ + "page__live": True, + "live": True, + }, + ) + ) else: log.warning("Missing required setting MITX_ONLINE_COURSES_API_URL") @@ -210,6 +226,8 @@ def _transform_course(course): }, "published": bool( parse_page_attribute(course, "page_url") + and parse_page_attribute(course, "live") + and course.get("live", False) ), # a course is only considered published if it has a page url "professional": False, "certification": has_certification, @@ -244,7 +262,11 @@ def _fetch_courses_by_ids(course_ids): return list( _fetch_data( settings.MITX_ONLINE_COURSES_API_URL, - params={"id": ",".join([str(courseid) for courseid in course_ids])}, + params={ + "id": ",".join([str(courseid) for courseid in course_ids]), + "page__live": True, + "live": True, + }, ) ) diff --git a/learning_resources/etl/mitxonline_test.py b/learning_resources/etl/mitxonline_test.py index 333bfa582b..d1fc49f321 100644 --- a/learning_resources/etl/mitxonline_test.py +++ b/learning_resources/etl/mitxonline_test.py @@ -194,6 +194,8 @@ def test_mitxonline_transform_programs( ), "published": bool( course_data.get("page", {}).get("page_url", None) + and course_data.get("page", {}).get("live", None) + and course_data.get("live", None) ), "certification": True, "certification_type": CertificationType.completion.name, @@ -303,7 +305,11 @@ def test_mitxonline_transform_courses(settings, mock_mitxonline_courses_data): course_data.get("page", {}).get("description", None) ), "offered_by": OFFERED_BY, - "published": course_data.get("page", {}).get("page_url", None) is not None, + "published": bool( + course_data.get("page", {}).get("page_url", None) + and course_data.get("page", {}).get("live", None) + and course_data.get("live", None) + ), "professional": False, "certification": parse_certification( "mitx", diff --git a/learning_resources/etl/pipelines.py b/learning_resources/etl/pipelines.py index 90db29f0f6..7ccf48d062 100644 --- a/learning_resources/etl/pipelines.py +++ b/learning_resources/etl/pipelines.py @@ -53,7 +53,7 @@ mitxonline_programs_etl = compose( load_programs( ETLSource.mitxonline.name, - config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True)), + config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True), prune=True), ), mitxonline.transform_programs, mitxonline.extract_programs, diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py index fb1525cca3..af94bda19d 100644 --- a/learning_resources/etl/pipelines_test.py +++ b/learning_resources/etl/pipelines_test.py @@ -79,7 +79,7 @@ def test_mitxonline_programs_etl(): mock_load_programs.assert_called_once_with( ETLSource.mitxonline.name, mock_transform.return_value, - config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True)), + config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True), prune=True), ) assert result == mock_load_programs.return_value