From 27d4966c314b502ea9f2098335f4470f58a32e66 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Mon, 17 Jun 2024 13:09:04 -0400 Subject: [PATCH 1/6] adding params to fetch published courses --- learning_resources/etl/mitxonline.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/learning_resources/etl/mitxonline.py b/learning_resources/etl/mitxonline.py index 0a1a6ca697..69ef44deec 100644 --- a/learning_resources/etl/mitxonline.py +++ b/learning_resources/etl/mitxonline.py @@ -90,7 +90,15 @@ def parse_page_attribute( def extract_programs(): """Loads the MITx Online catalog data""" # noqa: D401 if settings.MITX_ONLINE_PROGRAMS_API_URL: - return list(_fetch_data(settings.MITX_ONLINE_PROGRAMS_API_URL)) + return list( + _fetch_data( + settings.MITX_ONLINE_PROGRAMS_API_URL, + params={ + "courserun_is_enrollable": True, + "live": True, + }, + ) + ) else: log.warning("Missing required setting MITX_ONLINE_PROGRAMS_API_URL") @@ -100,7 +108,15 @@ def extract_programs(): def extract_courses(): """Loads the MITx Online catalog data""" # noqa: D401 if settings.MITX_ONLINE_COURSES_API_URL: - return list(_fetch_data(settings.MITX_ONLINE_COURSES_API_URL)) + return list( + _fetch_data( + settings.MITX_ONLINE_COURSES_API_URL, + params={ + "courserun_is_enrollable": True, + "live": True, + }, + ) + ) else: log.warning("Missing required setting MITX_ONLINE_COURSES_API_URL") @@ -244,7 +260,11 @@ def _fetch_courses_by_ids(course_ids): return list( _fetch_data( settings.MITX_ONLINE_COURSES_API_URL, - params={"id": ",".join([str(courseid) for courseid in course_ids])}, + params={ + "id": ",".join([str(courseid) for courseid in course_ids]), + "courserun_is_enrollable": True, + "live": True, + }, ) ) From 93846b321c54bf7c644b79a9b564893b204d9731 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Mon, 17 Jun 2024 13:39:52 -0400 Subject: [PATCH 2/6] pulling in archived courses --- learning_resources/etl/mitxonline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/learning_resources/etl/mitxonline.py b/learning_resources/etl/mitxonline.py index 69ef44deec..5c747fd9f8 100644 --- a/learning_resources/etl/mitxonline.py +++ b/learning_resources/etl/mitxonline.py @@ -94,7 +94,7 @@ def extract_programs(): _fetch_data( settings.MITX_ONLINE_PROGRAMS_API_URL, params={ - "courserun_is_enrollable": True, + "page__live": True, "live": True, }, ) @@ -112,7 +112,7 @@ def extract_courses(): _fetch_data( settings.MITX_ONLINE_COURSES_API_URL, params={ - "courserun_is_enrollable": True, + "page__live": True, "live": True, }, ) @@ -262,7 +262,7 @@ def _fetch_courses_by_ids(course_ids): settings.MITX_ONLINE_COURSES_API_URL, params={ "id": ",".join([str(courseid) for courseid in course_ids]), - "courserun_is_enrollable": True, + "page__live": True, "live": True, }, ) From 3999206a66f9751991da9e7278688e31c6573b02 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Tue, 18 Jun 2024 11:20:51 -0400 Subject: [PATCH 3/6] adding check for the course being live --- learning_resources/etl/mitxonline.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/learning_resources/etl/mitxonline.py b/learning_resources/etl/mitxonline.py index 5c747fd9f8..e25ec6a283 100644 --- a/learning_resources/etl/mitxonline.py +++ b/learning_resources/etl/mitxonline.py @@ -226,6 +226,8 @@ def _transform_course(course): }, "published": bool( parse_page_attribute(course, "page_url") + and parse_page_attribute(course, "live") + and course.get("live", False) ), # a course is only considered published if it has a page url "professional": False, "certification": has_certification, From 9dee8af37be257f8bbbf38712f6ce49ce4d93f24 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Tue, 18 Jun 2024 11:33:23 -0400 Subject: [PATCH 4/6] adding pruning of resources --- learning_resources/etl/loaders.py | 18 +++++++++++------- learning_resources/etl/pipelines.py | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index 0f350a1e61..59f3c6675f 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -516,14 +516,18 @@ def load_programs( blocklist = load_course_blocklist() duplicates = load_course_duplicates(etl_source) - return [ - program - for program in [ - load_program(program_data, blocklist, duplicates, config=config) - for program_data in programs_data - ] - if program is not None + programs = [ + load_program(program_data, blocklist, duplicates, config=config) + for program_data in programs_data ] + if programs and config.prune: + for learning_resource in LearningResource.objects.filter( + etl_source=etl_source, resource_type=LearningResourceType.program.name + ).exclude(id__in=[learning_resource.id for learning_resource in programs]): + learning_resource.published = False + learning_resource.save() + resource_unpublished_actions(learning_resource) + return [program for program in programs if program is not None] def load_content_file( diff --git a/learning_resources/etl/pipelines.py b/learning_resources/etl/pipelines.py index 90db29f0f6..7ccf48d062 100644 --- a/learning_resources/etl/pipelines.py +++ b/learning_resources/etl/pipelines.py @@ -53,7 +53,7 @@ mitxonline_programs_etl = compose( load_programs( ETLSource.mitxonline.name, - config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True)), + config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True), prune=True), ), mitxonline.transform_programs, mitxonline.extract_programs, From c6f76f2722013fc0bddf97ec311e83ab2cbc481e Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Tue, 18 Jun 2024 12:08:10 -0400 Subject: [PATCH 5/6] fixing test --- learning_resources/etl/loaders.py | 8 +++++++- learning_resources/etl/loaders_test.py | 9 ++++++--- learning_resources/etl/pipelines_test.py | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index 59f3c6675f..6cafffa2bc 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -523,7 +523,13 @@ def load_programs( if programs and config.prune: for learning_resource in LearningResource.objects.filter( etl_source=etl_source, resource_type=LearningResourceType.program.name - ).exclude(id__in=[learning_resource.id for learning_resource in programs]): + ).exclude( + id__in=[ + learning_resource.id + for learning_resource in programs + if learning_resource is not None + ] + ): learning_resource.published = False learning_resource.save() resource_unpublished_actions(learning_resource) diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index e4472ef2ec..d33cd3a9bc 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -743,11 +743,14 @@ def test_load_courses(mocker, mock_blocklist, mock_duplicates, prune): def test_load_programs(mocker, mock_blocklist, mock_duplicates): """Test that load_programs calls the expected functions""" - program_data = [{"courses": [{"platform": "a"}, {}]}] + program_data = [{"courses": [{"platform": "a"}, {}], "id": 5}] + mock_load_program = mocker.patch( - "learning_resources.etl.loaders.load_program", autospec=True + "learning_resources.etl.loaders.load_program", + autospec=True, + return_value=ProgramFactory.create().learning_resource, ) - load_programs("mitx", program_data) + load_programs("mitx", program_data, config=ProgramLoaderConfig(prune=True)) assert mock_load_program.call_count == len(program_data) mock_blocklist.assert_called_once() mock_duplicates.assert_called_once_with("mitx") diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py index fb1525cca3..af94bda19d 100644 --- a/learning_resources/etl/pipelines_test.py +++ b/learning_resources/etl/pipelines_test.py @@ -79,7 +79,7 @@ def test_mitxonline_programs_etl(): mock_load_programs.assert_called_once_with( ETLSource.mitxonline.name, mock_transform.return_value, - config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True)), + config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True), prune=True), ) assert result == mock_load_programs.return_value From 2d80e8e2ed8d327da74eb5b6ad87ecfff271a411 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Tue, 18 Jun 2024 12:21:22 -0400 Subject: [PATCH 6/6] test fixes --- learning_resources/etl/mitxonline_test.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/learning_resources/etl/mitxonline_test.py b/learning_resources/etl/mitxonline_test.py index 333bfa582b..d1fc49f321 100644 --- a/learning_resources/etl/mitxonline_test.py +++ b/learning_resources/etl/mitxonline_test.py @@ -194,6 +194,8 @@ def test_mitxonline_transform_programs( ), "published": bool( course_data.get("page", {}).get("page_url", None) + and course_data.get("page", {}).get("live", None) + and course_data.get("live", None) ), "certification": True, "certification_type": CertificationType.completion.name, @@ -303,7 +305,11 @@ def test_mitxonline_transform_courses(settings, mock_mitxonline_courses_data): course_data.get("page", {}).get("description", None) ), "offered_by": OFFERED_BY, - "published": course_data.get("page", {}).get("page_url", None) is not None, + "published": bool( + course_data.get("page", {}).get("page_url", None) + and course_data.get("page", {}).get("live", None) + and course_data.get("live", None) + ), "professional": False, "certification": parse_certification( "mitx",