From f9bf4f4c44c0d51169713acae6b4488795d757ad Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 12 Oct 2023 14:23:27 -0400 Subject: [PATCH 01/11] Fix readable_id and etl_source for OCW courses --- learning_resources/etl/ocw.py | 10 +++- learning_resources/etl/ocw_test.py | 5 +- .../0020_refactor_ocw_readable_id.py | 57 +++++++++++++++++++ 3 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 learning_resources/migrations/0020_refactor_ocw_readable_id.py diff --git a/learning_resources/etl/ocw.py b/learning_resources/etl/ocw.py index f159bba930..2ec05a9ef6 100644 --- a/learning_resources/etl/ocw.py +++ b/learning_resources/etl/ocw.py @@ -37,6 +37,7 @@ OFFERED_BY = {"name": OfferedBy.ocw.value} PRIMARY_COURSE_ID = "primary_course_number" +ETL_SOURCE = "ocw" def transform_content_files( @@ -276,14 +277,16 @@ def transform_course(course_data: dict) -> dict: else: uid = uid.replace("-", "") course_data["run_id"] = uid + course_id = f"{course_data.get(PRIMARY_COURSE_ID)}" + readable_id = f"{course_id}+{course_data.get('term')}_{course_data.get('year')}" extra_course_numbers = course_data.get("extra_course_numbers", None) if extra_course_numbers: extra_course_numbers = [num.strip() for num in extra_course_numbers.split(",")] else: extra_course_numbers = [] + extra_course_numbers.insert(0, course_id) - course_id = f"{course_data.get(PRIMARY_COURSE_ID)}" topics = [ {"name": topic_name} for topic_name in list( @@ -297,7 +300,9 @@ def transform_course(course_data: dict) -> dict: image_src = course_data.get("image_src") return { - "readable_id": course_id, + "readable_id": readable_id, + "etl_source": ETL_SOURCE, + "offered_by": copy.deepcopy(OFFERED_BY), "platform": PlatformType.ocw.value, "title": course_data["course_title"], "departments": course_data.get("department_numbers", []), @@ -311,7 +316,6 @@ def transform_course(course_data: dict) -> dict: .get("image_metadata", {}) .get("image-alt"), }, - "offered_by": copy.deepcopy(OFFERED_BY), "description": course_data["course_description"], "url": course_data.get("url"), "last_modified": course_data.get("last_modified"), diff --git a/learning_resources/etl/ocw_test.py b/learning_resources/etl/ocw_test.py index 4d0c0174ea..42c7c584f0 100644 --- a/learning_resources/etl/ocw_test.py +++ b/learning_resources/etl/ocw_test.py @@ -10,6 +10,7 @@ from learning_resources.conftest import OCW_TEST_PREFIX, setup_s3_ocw from learning_resources.etl.ocw import ( + ETL_SOURCE, transform_content_files, transform_contentfile, transform_course, @@ -185,6 +186,8 @@ def test_transform_course(settings, legacy_uid, site_uid, expected_uid, has_extr } transformed_json = transform_course(extracted_json) if expected_uid: + assert transformed_json["readable_id"] == "16.01+Fall_2005" + assert transformed_json["etl_source"] == ETL_SOURCE assert transformed_json["runs"][0]["run_id"] == expected_uid assert transformed_json["image"]["url"] == ( "http://test.edu/courses/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg" @@ -193,7 +196,7 @@ def test_transform_course(settings, legacy_uid, site_uid, expected_uid, has_extr "Illustration of an aircraft wing showing connections between the disciplines of the course." ) assert transformed_json["course"]["extra_course_numbers"] == ( - ["1", "2"] if has_extra_num else [] + ["16.01", "1", "2"] if has_extra_num else ["16.01"] ) else: assert transformed_json is None diff --git a/learning_resources/migrations/0020_refactor_ocw_readable_id.py b/learning_resources/migrations/0020_refactor_ocw_readable_id.py new file mode 100644 index 0000000000..0c79a40831 --- /dev/null +++ b/learning_resources/migrations/0020_refactor_ocw_readable_id.py @@ -0,0 +1,57 @@ +# Generated by Django 4.1.10 on 2023-10-02 16:21 + +from django.db import migrations + +from learning_resources.constants import PlatformType +from learning_resources.etl import ocw + + +def update_ocw_readable_id(apps, schema_editor): + """ + Update readable_id and course.extra_course_numbers for existing + OCW learning resources + """ + LearningResource = apps.get_model("learning_resources", "LearningResource") + for resource in ( + LearningResource.objects.filter(platform__platform=PlatformType.ocw.value) + .select_related("course") + .prefetch_related("runs") + ): + course = resource.course + course.extra_course_numbers = [course.learning_resource.readable_id] + ( + course.extra_course_numbers or [] + ) + course.save() + resource.etl_source = ocw.ETL_SOURCE + run = resource.runs.first() + resource.readable_id = f"{resource.readable_id}+{run.semester}_{run.year}" + resource.save() + + +def revert_ocw_readable_id(apps, schema_editor): + """ + Revert readable_id and course.extra_course_numbers for existing + OCW learning resources + """ + LearningResource = apps.get_model("learning_resources", "LearningResource") + for resource in LearningResource.objects.filter( + platform__platform=PlatformType.ocw.value + ).select_related("course"): + resource.readable_id = resource.readable_id.split("+")[0] + resource.save() + course = resource.course + course.extra_course_numbers = ( + None + if course.extra_course_numbers == [resource.readable_id] + else course.extra_course_numbers[1:] + ) + course.save() + + +class Migration(migrations.Migration): + dependencies = [ + ("learning_resources", "0019_departments"), + ] + operations = [ + migrations.RunPython(update_ocw_readable_id, revert_ocw_readable_id), + ] From 3cd25c28499527bc75846b863e0c55f05335d012 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 12 Oct 2023 14:35:35 -0400 Subject: [PATCH 02/11] include runs in LearningResource admin --- learning_resources/admin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learning_resources/admin.py b/learning_resources/admin.py index f6ec960154..896ada4231 100644 --- a/learning_resources/admin.py +++ b/learning_resources/admin.py @@ -104,7 +104,7 @@ class LearningResourceAdmin(admin.ModelAdmin): "published", ) list_filter = ("platform", "offered_by", "resource_type", "published") - inlines = [CourseInline, LearningPathInline] + inlines = [CourseInline, LearningPathInline, LearningResourceRunInline] autocomplete_fields = ("topics",) From f7d5726d9a183ed8cf49d3f7c270f83e320cef73 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 12 Oct 2023 14:58:44 -0400 Subject: [PATCH 03/11] Adjust migration --- learning_resources/migrations/0020_refactor_ocw_readable_id.py | 1 + 1 file changed, 1 insertion(+) diff --git a/learning_resources/migrations/0020_refactor_ocw_readable_id.py b/learning_resources/migrations/0020_refactor_ocw_readable_id.py index 0c79a40831..841df81338 100644 --- a/learning_resources/migrations/0020_refactor_ocw_readable_id.py +++ b/learning_resources/migrations/0020_refactor_ocw_readable_id.py @@ -25,6 +25,7 @@ def update_ocw_readable_id(apps, schema_editor): resource.etl_source = ocw.ETL_SOURCE run = resource.runs.first() resource.readable_id = f"{resource.readable_id}+{run.semester}_{run.year}" + resource.runs.exclude(pk=run.pk).delete() resource.save() From 4b7e2eb0ab51506b7409d27d988c350e4aca029d Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 12 Oct 2023 15:25:10 -0400 Subject: [PATCH 04/11] Fix tests --- learning_resources/etl/pipelines_test.py | 70 +------------------ .../0020_refactor_ocw_readable_id.py | 2 +- learning_resources/tasks_test.py | 2 +- 3 files changed, 4 insertions(+), 70 deletions(-) diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py index 92df2c2c96..c3cd0c5f96 100644 --- a/learning_resources/etl/pipelines_test.py +++ b/learning_resources/etl/pipelines_test.py @@ -14,72 +14,6 @@ from learning_resources.models import LearningResource -@pytest.fixture() -def ocw_valid_data(): - """ - Return valid ocw data - """ - return { - "course_title": "Unified Engineering I, II, III, \u0026 IV", - "course_description": "The basic objective of Unified Engineering is to give a solid understanding of the fundamental disciplines of aerospace engineering, as well as their interrelationships and applications. These disciplines are Materials and Structures (M); Computers and Programming (C); Fluid Mechanics (F); Thermodynamics (T); Propulsion (P); and Signals and Systems (S). In choosing to teach these subjects in a unified manner, the instructors seek to explain the common intellectual threads in these disciplines, as well as their combined application to solve engineering Systems Problems (SP). Throughout the year, the instructors emphasize the connections among the disciplines", - "site_uid": None, - "legacy_uid": "97db384e-f340-09a6-4df7-cb86cf701979", - "instructors": [ - { - "first_name": "Mark", - "last_name": "Drela", - "middle_initial": "", - "salutation": "Prof.", - "title": "Prof. Mark Drela", - }, - { - "first_name": "Steven", - "last_name": "Hall", - "middle_initial": "", - "salutation": "Prof.", - "title": "Prof. Steven Hall", - }, - ], - "department_numbers": ["16"], - "learning_resource_types": [ - "Lecture Videos", - "Course Introduction", - "Competition Videos", - "Problem Sets with Solutions", - "Exams with Solutions", - ], - "topics": [ - ["Engineering", "Aerospace Engineering", "Materials Selection"], - ["Engineering", "Aerospace Engineering", "Propulsion Systems"], - ["Science", "Physics", "Thermodynamics"], - ["Engineering", "Mechanical Engineering", "Fluid Mechanics"], - ["Engineering", "Aerospace Engineering"], - ["Business", "Project Management"], - ], - "primary_course_number": "16.01", - "extra_course_numbers": "16.02, 16.03, 16.04, 17.01", - "term": "Fall", - "year": "2005", - "level": ["Undergraduate"], - "image_src": "https://open-learning-course-data-production.s3.amazonaws.com/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg", - "course_image_metadata": { - "description": "An abstracted aircraft wing with illustrated systems. (Image by MIT OCW.)", - "draft": False, - "file": "https://open-learning-course-data-production.s3.amazonaws.com/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg", - "file_type": "image/jpeg", - "image_metadata": { - "caption": "An abstracted aircraft wing, illustrating the connections between the disciplines of Unified Engineering. (Image by MIT OpenCourseWare.)", - "credit": "", - "image-alt": "Illustration of an aircraft wing showing connections between the disciplines of the course.", - }, - "iscjklanguage": False, - "resourcetype": "Image", - "title": "16-01f05.jpg", - "uid": "8f56bbb3-5d0e-456d-c8b7-0911bec7cd0d", - }, - } - - @contextmanager def reload_mocked_pipeline(*patchers): """Create a context that is rolled back after executing the pipeline""" @@ -172,8 +106,8 @@ def test_ocw_courses_etl(settings, mocker): ) resource = LearningResource.objects.first() - assert resource.readable_id == "16.01" - assert resource.course.extra_course_numbers == ["16.02", "16.03", "16.04"] + assert resource.readable_id == "16.01+Fall_2005" + assert resource.course.extra_course_numbers == ["16.01", "16.02", "16.03", "16.04"] assert resource.platform.platform == PlatformType.ocw.value assert resource.offered_by.name == OfferedBy.ocw.value assert resource.departments.first().department_id == "16" diff --git a/learning_resources/migrations/0020_refactor_ocw_readable_id.py b/learning_resources/migrations/0020_refactor_ocw_readable_id.py index 841df81338..51c491f4cf 100644 --- a/learning_resources/migrations/0020_refactor_ocw_readable_id.py +++ b/learning_resources/migrations/0020_refactor_ocw_readable_id.py @@ -1,4 +1,4 @@ -# Generated by Django 4.1.10 on 2023-10-02 16:21 +# Generated manually to convert the readable_id for OCW learning resources from django.db import migrations diff --git a/learning_resources/tasks_test.py b/learning_resources/tasks_test.py index 08c0e0fd2a..a58d83ac51 100644 --- a/learning_resources/tasks_test.py +++ b/learning_resources/tasks_test.py @@ -199,7 +199,7 @@ def test_get_ocw_courses(settings, mocker, mocked_celery, timestamp, overwrite): course_resource = models.Course.objects.first().learning_resource assert course_resource.title == "Unified Engineering I, II, III, & IV" - assert course_resource.readable_id == "16.01" + assert course_resource.readable_id == "16.01+Fall_2005" assert course_resource.runs.count() == 1 assert course_resource.runs.first().run_id == "97db384ef34009a64df7cb86cf701979" assert ( From 0e0bf5c46bb47e780f3e60f9d1bb95da189c5c88 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 12 Oct 2023 15:44:25 -0400 Subject: [PATCH 05/11] Slugify semester part of ocw readable_id because sometimes the value is 'January IAP' --- learning_resources/etl/ocw.py | 5 ++++- learning_resources/etl/ocw_test.py | 2 +- learning_resources/etl/pipelines_test.py | 2 +- .../migrations/0020_refactor_ocw_readable_id.py | 5 ++++- learning_resources/tasks_test.py | 2 +- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/learning_resources/etl/ocw.py b/learning_resources/etl/ocw.py index 2ec05a9ef6..3a2e461803 100644 --- a/learning_resources/etl/ocw.py +++ b/learning_resources/etl/ocw.py @@ -10,6 +10,7 @@ import boto3 from botocore.exceptions import ClientError from django.conf import settings +from django.utils.text import slugify from requests import ReadTimeout from retry import retry @@ -278,7 +279,9 @@ def transform_course(course_data: dict) -> dict: uid = uid.replace("-", "") course_data["run_id"] = uid course_id = f"{course_data.get(PRIMARY_COURSE_ID)}" - readable_id = f"{course_id}+{course_data.get('term')}_{course_data.get('year')}" + readable_id = ( + f"{course_id}+{slugify(course_data.get('term'))}_{course_data.get('year')}" + ) extra_course_numbers = course_data.get("extra_course_numbers", None) if extra_course_numbers: diff --git a/learning_resources/etl/ocw_test.py b/learning_resources/etl/ocw_test.py index 42c7c584f0..4ecf10cb66 100644 --- a/learning_resources/etl/ocw_test.py +++ b/learning_resources/etl/ocw_test.py @@ -186,7 +186,7 @@ def test_transform_course(settings, legacy_uid, site_uid, expected_uid, has_extr } transformed_json = transform_course(extracted_json) if expected_uid: - assert transformed_json["readable_id"] == "16.01+Fall_2005" + assert transformed_json["readable_id"] == "16.01+fall_2005" assert transformed_json["etl_source"] == ETL_SOURCE assert transformed_json["runs"][0]["run_id"] == expected_uid assert transformed_json["image"]["url"] == ( diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py index c3cd0c5f96..05d40eb9a4 100644 --- a/learning_resources/etl/pipelines_test.py +++ b/learning_resources/etl/pipelines_test.py @@ -106,7 +106,7 @@ def test_ocw_courses_etl(settings, mocker): ) resource = LearningResource.objects.first() - assert resource.readable_id == "16.01+Fall_2005" + assert resource.readable_id == "16.01+fall_2005" assert resource.course.extra_course_numbers == ["16.01", "16.02", "16.03", "16.04"] assert resource.platform.platform == PlatformType.ocw.value assert resource.offered_by.name == OfferedBy.ocw.value diff --git a/learning_resources/migrations/0020_refactor_ocw_readable_id.py b/learning_resources/migrations/0020_refactor_ocw_readable_id.py index 51c491f4cf..0108641e57 100644 --- a/learning_resources/migrations/0020_refactor_ocw_readable_id.py +++ b/learning_resources/migrations/0020_refactor_ocw_readable_id.py @@ -1,6 +1,7 @@ # Generated manually to convert the readable_id for OCW learning resources from django.db import migrations +from django.utils.text import slugify from learning_resources.constants import PlatformType from learning_resources.etl import ocw @@ -24,7 +25,9 @@ def update_ocw_readable_id(apps, schema_editor): course.save() resource.etl_source = ocw.ETL_SOURCE run = resource.runs.first() - resource.readable_id = f"{resource.readable_id}+{run.semester}_{run.year}" + resource.readable_id = ( + f"{resource.readable_id}+{slugify(run.semester)}_{run.year}" + ) resource.runs.exclude(pk=run.pk).delete() resource.save() diff --git a/learning_resources/tasks_test.py b/learning_resources/tasks_test.py index a58d83ac51..764ad39101 100644 --- a/learning_resources/tasks_test.py +++ b/learning_resources/tasks_test.py @@ -199,7 +199,7 @@ def test_get_ocw_courses(settings, mocker, mocked_celery, timestamp, overwrite): course_resource = models.Course.objects.first().learning_resource assert course_resource.title == "Unified Engineering I, II, III, & IV" - assert course_resource.readable_id == "16.01+Fall_2005" + assert course_resource.readable_id == "16.01+fall_2005" assert course_resource.runs.count() == 1 assert course_resource.runs.first().run_id == "97db384ef34009a64df7cb86cf701979" assert ( From 8fc0635a3047cae0a9793a48c68ed2c14a15565b Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Fri, 13 Oct 2023 15:16:48 -0400 Subject: [PATCH 06/11] Leave extra_course_numbers as is for OCW, deal with it later --- learning_resources/etl/ocw.py | 9 ++++----- learning_resources/etl/ocw_test.py | 2 +- learning_resources/etl/pipelines_test.py | 2 +- .../0020_refactor_ocw_readable_id.py | 20 +++---------------- 4 files changed, 9 insertions(+), 24 deletions(-) diff --git a/learning_resources/etl/ocw.py b/learning_resources/etl/ocw.py index 3a2e461803..58ef10cca8 100644 --- a/learning_resources/etl/ocw.py +++ b/learning_resources/etl/ocw.py @@ -278,17 +278,16 @@ def transform_course(course_data: dict) -> dict: else: uid = uid.replace("-", "") course_data["run_id"] = uid - course_id = f"{course_data.get(PRIMARY_COURSE_ID)}" - readable_id = ( - f"{course_id}+{slugify(course_data.get('term'))}_{course_data.get('year')}" - ) + extra_course_numbers = course_data.get("extra_course_numbers", None) if extra_course_numbers: extra_course_numbers = [num.strip() for num in extra_course_numbers.split(",")] else: extra_course_numbers = [] - extra_course_numbers.insert(0, course_id) + + readable_id = f"{course_data.get(PRIMARY_COURSE_ID)}+\ + {slugify(course_data.get('term'))}_{course_data.get('year')}" topics = [ {"name": topic_name} diff --git a/learning_resources/etl/ocw_test.py b/learning_resources/etl/ocw_test.py index 4ecf10cb66..cb915dc82e 100644 --- a/learning_resources/etl/ocw_test.py +++ b/learning_resources/etl/ocw_test.py @@ -196,7 +196,7 @@ def test_transform_course(settings, legacy_uid, site_uid, expected_uid, has_extr "Illustration of an aircraft wing showing connections between the disciplines of the course." ) assert transformed_json["course"]["extra_course_numbers"] == ( - ["16.01", "1", "2"] if has_extra_num else ["16.01"] + ["1", "2"] if has_extra_num else [] ) else: assert transformed_json is None diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py index 05d40eb9a4..9cc3cd7ad9 100644 --- a/learning_resources/etl/pipelines_test.py +++ b/learning_resources/etl/pipelines_test.py @@ -107,7 +107,7 @@ def test_ocw_courses_etl(settings, mocker): resource = LearningResource.objects.first() assert resource.readable_id == "16.01+fall_2005" - assert resource.course.extra_course_numbers == ["16.01", "16.02", "16.03", "16.04"] + assert resource.course.extra_course_numbers == ["16.02", "16.03", "16.04"] assert resource.platform.platform == PlatformType.ocw.value assert resource.offered_by.name == OfferedBy.ocw.value assert resource.departments.first().department_id == "16" diff --git a/learning_resources/migrations/0020_refactor_ocw_readable_id.py b/learning_resources/migrations/0020_refactor_ocw_readable_id.py index 0108641e57..379f111426 100644 --- a/learning_resources/migrations/0020_refactor_ocw_readable_id.py +++ b/learning_resources/migrations/0020_refactor_ocw_readable_id.py @@ -13,16 +13,9 @@ def update_ocw_readable_id(apps, schema_editor): OCW learning resources """ LearningResource = apps.get_model("learning_resources", "LearningResource") - for resource in ( - LearningResource.objects.filter(platform__platform=PlatformType.ocw.value) - .select_related("course") - .prefetch_related("runs") - ): - course = resource.course - course.extra_course_numbers = [course.learning_resource.readable_id] + ( - course.extra_course_numbers or [] - ) - course.save() + for resource in LearningResource.objects.filter( + platform__platform=PlatformType.ocw.value + ).prefetch_related("runs"): resource.etl_source = ocw.ETL_SOURCE run = resource.runs.first() resource.readable_id = ( @@ -43,13 +36,6 @@ def revert_ocw_readable_id(apps, schema_editor): ).select_related("course"): resource.readable_id = resource.readable_id.split("+")[0] resource.save() - course = resource.course - course.extra_course_numbers = ( - None - if course.extra_course_numbers == [resource.readable_id] - else course.extra_course_numbers[1:] - ) - course.save() class Migration(migrations.Migration): From a2c6c7991a7dfd453955b92d432f369ae2983324 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Fri, 13 Oct 2023 15:25:44 -0400 Subject: [PATCH 07/11] Leave extra_course_numbers as is for OCW, deal with it later --- learning_resources/etl/ocw.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/learning_resources/etl/ocw.py b/learning_resources/etl/ocw.py index 58ef10cca8..e7abc34b52 100644 --- a/learning_resources/etl/ocw.py +++ b/learning_resources/etl/ocw.py @@ -286,9 +286,7 @@ def transform_course(course_data: dict) -> dict: else: extra_course_numbers = [] - readable_id = f"{course_data.get(PRIMARY_COURSE_ID)}+\ - {slugify(course_data.get('term'))}_{course_data.get('year')}" - + readable_id = f"{course_data.get(PRIMARY_COURSE_ID)}+{slugify(course_data.get('term'))}_{course_data.get('year')}" # noqa: E501 topics = [ {"name": topic_name} for topic_name in list( From c7f92ab8731be199b971d8cf3e2853947c77fd11 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Fri, 13 Oct 2023 16:39:28 -0400 Subject: [PATCH 08/11] Remove some course_catalog tests --- course_catalog/etl/pipelines_test.py | 110 --------------------------- 1 file changed, 110 deletions(-) diff --git a/course_catalog/etl/pipelines_test.py b/course_catalog/etl/pipelines_test.py index 655810b8da..47f27a0629 100644 --- a/course_catalog/etl/pipelines_test.py +++ b/course_catalog/etl/pipelines_test.py @@ -66,98 +66,6 @@ def test_micromasters_etl(): assert result == mock_load_programs.return_value -def test_xpro_programs_etl(): - """Verify that xpro programs etl pipeline executes correctly""" - with reload_mocked_pipeline( - patch("course_catalog.etl.xpro.extract_programs", autospec=True), - patch("course_catalog.etl.xpro.transform_programs", autospec=True), - patch("course_catalog.etl.loaders.load_programs", autospec=True), - ) as patches: - mock_extract, mock_transform, mock_load_programs = patches - result = pipelines.xpro_programs_etl() - - mock_extract.assert_called_once_with() - mock_transform.assert_called_once_with(mock_extract.return_value) - mock_load_programs.assert_called_once_with( - PlatformType.xpro.value, mock_transform.return_value - ) - - assert result == mock_load_programs.return_value - - -def test_xpro_courses_etl(): - """Verify that xpro courses etl pipeline executes correctly""" - with reload_mocked_pipeline( - patch("course_catalog.etl.xpro.extract_courses", autospec=True), - patch("course_catalog.etl.xpro.transform_courses", autospec=True), - patch("course_catalog.etl.loaders.load_courses", autospec=True), - ) as patches: - mock_extract, mock_transform, mock_load_courses = patches - result = pipelines.xpro_courses_etl() - - mock_extract.assert_called_once_with() - mock_transform.assert_called_once_with(mock_extract.return_value) - mock_load_courses.assert_called_once_with( - PlatformType.xpro.value, - mock_transform.return_value, - config=CourseLoaderConfig(prune=True), - ) - - assert result == mock_load_courses.return_value - - -def test_mitx_etl(): - """Verify that mitx etl pipeline executes correctly""" - with reload_mocked_pipeline( - patch("course_catalog.etl.mitx.extract", autospec=True), - patch("course_catalog.etl.mitx.transform", autospec=False), - patch("course_catalog.etl.loaders.load_courses", autospec=True), - ) as patches: - mock_extract, mock_transform, mock_load_courses = patches - result = pipelines.mitx_etl() - - mock_extract.assert_called_once_with() - - # each of these should be called with the return value of the extract - mock_transform.assert_called_once_with(mock_extract.return_value) - - # load_courses should be called *only* with the return value of transform - mock_load_courses.assert_called_once_with( - PlatformType.mitx.value, - mock_transform.return_value, - config=CourseLoaderConfig( - prune=True, - offered_by=OfferedByLoaderConfig(additive=True), - runs=LearningResourceRunLoaderConfig( - offered_by=OfferedByLoaderConfig(additive=True) - ), - ), - ) - - assert result == mock_load_courses.return_value - - -def test_oll_etl(): - """Verify that OLL etl pipeline executes correctly""" - with reload_mocked_pipeline( - patch("course_catalog.etl.oll.extract", autospec=True), - patch("course_catalog.etl.oll.transform", autospec=False), - patch("course_catalog.etl.loaders.load_courses", autospec=True), - ) as patches: - mock_extract, mock_transform, mock_load_courses = patches - result = pipelines.oll_etl() - - mock_extract.assert_called_once_with() - mock_transform.assert_called_once_with(mock_extract.return_value) - mock_load_courses.assert_called_once_with( - PlatformType.oll.value, - mock_transform.return_value, - config=CourseLoaderConfig(prune=True), - ) - - assert result == mock_load_courses.return_value - - def test_youtube_etl(): """Verify that youtube etl pipeline executes correctly""" with reload_mocked_pipeline( @@ -175,24 +83,6 @@ def test_youtube_etl(): assert result == mock_load_video_channels.return_value -def test_podcast_etl(): - """Verify that podcast etl pipeline executes correctly""" - - with reload_mocked_pipeline( - patch("course_catalog.etl.podcast.extract", autospec=True), - patch("course_catalog.etl.podcast.transform", autospec=True), - patch("course_catalog.etl.loaders.load_podcasts", autospec=True), - ) as patches: - mock_extract, mock_transform, mock_load_podcasts = patches - result = pipelines.podcast_etl() - - mock_extract.assert_called_once_with() - mock_transform.assert_called_once_with(mock_extract.return_value) - mock_load_podcasts.assert_called_once_with(mock_transform.return_value) - - assert result == mock_load_podcasts.return_value - - @pytest.mark.django_db() def test_prolearn_programs_etl(): """ From 0b9fc708161937a15ee828f307997da0611e2034 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Mon, 16 Oct 2023 10:35:30 -0400 Subject: [PATCH 09/11] Pick run with url that matches resource url --- learning_resources/migrations/0020_refactor_ocw_readable_id.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learning_resources/migrations/0020_refactor_ocw_readable_id.py b/learning_resources/migrations/0020_refactor_ocw_readable_id.py index 379f111426..24761f06b3 100644 --- a/learning_resources/migrations/0020_refactor_ocw_readable_id.py +++ b/learning_resources/migrations/0020_refactor_ocw_readable_id.py @@ -17,7 +17,7 @@ def update_ocw_readable_id(apps, schema_editor): platform__platform=PlatformType.ocw.value ).prefetch_related("runs"): resource.etl_source = ocw.ETL_SOURCE - run = resource.runs.first() + run = resource.runs.get(url=resource.url) resource.readable_id = ( f"{resource.readable_id}+{slugify(run.semester)}_{run.year}" ) From 3ea379c2ba17dc7a6fbee4d446d4e39926da672a Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Mon, 16 Oct 2023 10:47:19 -0400 Subject: [PATCH 10/11] Just in case no matching run exists --- .../migrations/0020_refactor_ocw_readable_id.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/learning_resources/migrations/0020_refactor_ocw_readable_id.py b/learning_resources/migrations/0020_refactor_ocw_readable_id.py index 24761f06b3..12f0eb1dca 100644 --- a/learning_resources/migrations/0020_refactor_ocw_readable_id.py +++ b/learning_resources/migrations/0020_refactor_ocw_readable_id.py @@ -17,12 +17,13 @@ def update_ocw_readable_id(apps, schema_editor): platform__platform=PlatformType.ocw.value ).prefetch_related("runs"): resource.etl_source = ocw.ETL_SOURCE - run = resource.runs.get(url=resource.url) - resource.readable_id = ( - f"{resource.readable_id}+{slugify(run.semester)}_{run.year}" - ) - resource.runs.exclude(pk=run.pk).delete() - resource.save() + run = resource.runs.filter(url=resource.url).first() + if run: + resource.readable_id = ( + f"{resource.readable_id}+{slugify(run.semester)}_{run.year}" + ) + resource.runs.exclude(pk=run.pk).delete() + resource.save() def revert_ocw_readable_id(apps, schema_editor): From e2a5d06349ba9d1ef83b066b14b909c5f757c79a Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Mon, 16 Oct 2023 11:16:59 -0400 Subject: [PATCH 11/11] Add logging just in case --- .../migrations/0020_refactor_ocw_readable_id.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/learning_resources/migrations/0020_refactor_ocw_readable_id.py b/learning_resources/migrations/0020_refactor_ocw_readable_id.py index 12f0eb1dca..4fd9ab8812 100644 --- a/learning_resources/migrations/0020_refactor_ocw_readable_id.py +++ b/learning_resources/migrations/0020_refactor_ocw_readable_id.py @@ -1,4 +1,5 @@ # Generated manually to convert the readable_id for OCW learning resources +import logging from django.db import migrations from django.utils.text import slugify @@ -6,6 +7,8 @@ from learning_resources.constants import PlatformType from learning_resources.etl import ocw +log = logging.getLogger() + def update_ocw_readable_id(apps, schema_editor): """ @@ -24,6 +27,8 @@ def update_ocw_readable_id(apps, schema_editor): ) resource.runs.exclude(pk=run.pk).delete() resource.save() + else: + log.error("No run found for %s", resource.url) def revert_ocw_readable_id(apps, schema_editor):