diff --git a/course_catalog/etl/pipelines_test.py b/course_catalog/etl/pipelines_test.py index 655810b8da..47f27a0629 100644 --- a/course_catalog/etl/pipelines_test.py +++ b/course_catalog/etl/pipelines_test.py @@ -66,98 +66,6 @@ def test_micromasters_etl(): assert result == mock_load_programs.return_value -def test_xpro_programs_etl(): - """Verify that xpro programs etl pipeline executes correctly""" - with reload_mocked_pipeline( - patch("course_catalog.etl.xpro.extract_programs", autospec=True), - patch("course_catalog.etl.xpro.transform_programs", autospec=True), - patch("course_catalog.etl.loaders.load_programs", autospec=True), - ) as patches: - mock_extract, mock_transform, mock_load_programs = patches - result = pipelines.xpro_programs_etl() - - mock_extract.assert_called_once_with() - mock_transform.assert_called_once_with(mock_extract.return_value) - mock_load_programs.assert_called_once_with( - PlatformType.xpro.value, mock_transform.return_value - ) - - assert result == mock_load_programs.return_value - - -def test_xpro_courses_etl(): - """Verify that xpro courses etl pipeline executes correctly""" - with reload_mocked_pipeline( - patch("course_catalog.etl.xpro.extract_courses", autospec=True), - patch("course_catalog.etl.xpro.transform_courses", autospec=True), - patch("course_catalog.etl.loaders.load_courses", autospec=True), - ) as patches: - mock_extract, mock_transform, mock_load_courses = patches - result = pipelines.xpro_courses_etl() - - mock_extract.assert_called_once_with() - mock_transform.assert_called_once_with(mock_extract.return_value) - mock_load_courses.assert_called_once_with( - PlatformType.xpro.value, - mock_transform.return_value, - config=CourseLoaderConfig(prune=True), - ) - - assert result == mock_load_courses.return_value - - -def test_mitx_etl(): - """Verify that mitx etl pipeline executes correctly""" - with reload_mocked_pipeline( - patch("course_catalog.etl.mitx.extract", autospec=True), - patch("course_catalog.etl.mitx.transform", autospec=False), - patch("course_catalog.etl.loaders.load_courses", autospec=True), - ) as patches: - mock_extract, mock_transform, mock_load_courses = patches - result = pipelines.mitx_etl() - - mock_extract.assert_called_once_with() - - # each of these should be called with the return value of the extract - mock_transform.assert_called_once_with(mock_extract.return_value) - - # load_courses should be called *only* with the return value of transform - mock_load_courses.assert_called_once_with( - PlatformType.mitx.value, - mock_transform.return_value, - config=CourseLoaderConfig( - prune=True, - offered_by=OfferedByLoaderConfig(additive=True), - runs=LearningResourceRunLoaderConfig( - offered_by=OfferedByLoaderConfig(additive=True) - ), - ), - ) - - assert result == mock_load_courses.return_value - - -def test_oll_etl(): - """Verify that OLL etl pipeline executes correctly""" - with reload_mocked_pipeline( - patch("course_catalog.etl.oll.extract", autospec=True), - patch("course_catalog.etl.oll.transform", autospec=False), - patch("course_catalog.etl.loaders.load_courses", autospec=True), - ) as patches: - mock_extract, mock_transform, mock_load_courses = patches - result = pipelines.oll_etl() - - mock_extract.assert_called_once_with() - mock_transform.assert_called_once_with(mock_extract.return_value) - mock_load_courses.assert_called_once_with( - PlatformType.oll.value, - mock_transform.return_value, - config=CourseLoaderConfig(prune=True), - ) - - assert result == mock_load_courses.return_value - - def test_youtube_etl(): """Verify that youtube etl pipeline executes correctly""" with reload_mocked_pipeline( @@ -175,24 +83,6 @@ def test_youtube_etl(): assert result == mock_load_video_channels.return_value -def test_podcast_etl(): - """Verify that podcast etl pipeline executes correctly""" - - with reload_mocked_pipeline( - patch("course_catalog.etl.podcast.extract", autospec=True), - patch("course_catalog.etl.podcast.transform", autospec=True), - patch("course_catalog.etl.loaders.load_podcasts", autospec=True), - ) as patches: - mock_extract, mock_transform, mock_load_podcasts = patches - result = pipelines.podcast_etl() - - mock_extract.assert_called_once_with() - mock_transform.assert_called_once_with(mock_extract.return_value) - mock_load_podcasts.assert_called_once_with(mock_transform.return_value) - - assert result == mock_load_podcasts.return_value - - @pytest.mark.django_db() def test_prolearn_programs_etl(): """ diff --git a/learning_resources/admin.py b/learning_resources/admin.py index f6ec960154..896ada4231 100644 --- a/learning_resources/admin.py +++ b/learning_resources/admin.py @@ -104,7 +104,7 @@ class LearningResourceAdmin(admin.ModelAdmin): "published", ) list_filter = ("platform", "offered_by", "resource_type", "published") - inlines = [CourseInline, LearningPathInline] + inlines = [CourseInline, LearningPathInline, LearningResourceRunInline] autocomplete_fields = ("topics",) diff --git a/learning_resources/etl/ocw.py b/learning_resources/etl/ocw.py index f159bba930..e7abc34b52 100644 --- a/learning_resources/etl/ocw.py +++ b/learning_resources/etl/ocw.py @@ -10,6 +10,7 @@ import boto3 from botocore.exceptions import ClientError from django.conf import settings +from django.utils.text import slugify from requests import ReadTimeout from retry import retry @@ -37,6 +38,7 @@ OFFERED_BY = {"name": OfferedBy.ocw.value} PRIMARY_COURSE_ID = "primary_course_number" +ETL_SOURCE = "ocw" def transform_content_files( @@ -276,6 +278,7 @@ def transform_course(course_data: dict) -> dict: else: uid = uid.replace("-", "") course_data["run_id"] = uid + extra_course_numbers = course_data.get("extra_course_numbers", None) if extra_course_numbers: @@ -283,7 +286,7 @@ def transform_course(course_data: dict) -> dict: else: extra_course_numbers = [] - course_id = f"{course_data.get(PRIMARY_COURSE_ID)}" + readable_id = f"{course_data.get(PRIMARY_COURSE_ID)}+{slugify(course_data.get('term'))}_{course_data.get('year')}" # noqa: E501 topics = [ {"name": topic_name} for topic_name in list( @@ -297,7 +300,9 @@ def transform_course(course_data: dict) -> dict: image_src = course_data.get("image_src") return { - "readable_id": course_id, + "readable_id": readable_id, + "etl_source": ETL_SOURCE, + "offered_by": copy.deepcopy(OFFERED_BY), "platform": PlatformType.ocw.value, "title": course_data["course_title"], "departments": course_data.get("department_numbers", []), @@ -311,7 +316,6 @@ def transform_course(course_data: dict) -> dict: .get("image_metadata", {}) .get("image-alt"), }, - "offered_by": copy.deepcopy(OFFERED_BY), "description": course_data["course_description"], "url": course_data.get("url"), "last_modified": course_data.get("last_modified"), diff --git a/learning_resources/etl/ocw_test.py b/learning_resources/etl/ocw_test.py index 4d0c0174ea..cb915dc82e 100644 --- a/learning_resources/etl/ocw_test.py +++ b/learning_resources/etl/ocw_test.py @@ -10,6 +10,7 @@ from learning_resources.conftest import OCW_TEST_PREFIX, setup_s3_ocw from learning_resources.etl.ocw import ( + ETL_SOURCE, transform_content_files, transform_contentfile, transform_course, @@ -185,6 +186,8 @@ def test_transform_course(settings, legacy_uid, site_uid, expected_uid, has_extr } transformed_json = transform_course(extracted_json) if expected_uid: + assert transformed_json["readable_id"] == "16.01+fall_2005" + assert transformed_json["etl_source"] == ETL_SOURCE assert transformed_json["runs"][0]["run_id"] == expected_uid assert transformed_json["image"]["url"] == ( "http://test.edu/courses/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg" diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py index 92df2c2c96..9cc3cd7ad9 100644 --- a/learning_resources/etl/pipelines_test.py +++ b/learning_resources/etl/pipelines_test.py @@ -14,72 +14,6 @@ from learning_resources.models import LearningResource -@pytest.fixture() -def ocw_valid_data(): - """ - Return valid ocw data - """ - return { - "course_title": "Unified Engineering I, II, III, \u0026 IV", - "course_description": "The basic objective of Unified Engineering is to give a solid understanding of the fundamental disciplines of aerospace engineering, as well as their interrelationships and applications. These disciplines are Materials and Structures (M); Computers and Programming (C); Fluid Mechanics (F); Thermodynamics (T); Propulsion (P); and Signals and Systems (S). In choosing to teach these subjects in a unified manner, the instructors seek to explain the common intellectual threads in these disciplines, as well as their combined application to solve engineering Systems Problems (SP). Throughout the year, the instructors emphasize the connections among the disciplines", - "site_uid": None, - "legacy_uid": "97db384e-f340-09a6-4df7-cb86cf701979", - "instructors": [ - { - "first_name": "Mark", - "last_name": "Drela", - "middle_initial": "", - "salutation": "Prof.", - "title": "Prof. Mark Drela", - }, - { - "first_name": "Steven", - "last_name": "Hall", - "middle_initial": "", - "salutation": "Prof.", - "title": "Prof. Steven Hall", - }, - ], - "department_numbers": ["16"], - "learning_resource_types": [ - "Lecture Videos", - "Course Introduction", - "Competition Videos", - "Problem Sets with Solutions", - "Exams with Solutions", - ], - "topics": [ - ["Engineering", "Aerospace Engineering", "Materials Selection"], - ["Engineering", "Aerospace Engineering", "Propulsion Systems"], - ["Science", "Physics", "Thermodynamics"], - ["Engineering", "Mechanical Engineering", "Fluid Mechanics"], - ["Engineering", "Aerospace Engineering"], - ["Business", "Project Management"], - ], - "primary_course_number": "16.01", - "extra_course_numbers": "16.02, 16.03, 16.04, 17.01", - "term": "Fall", - "year": "2005", - "level": ["Undergraduate"], - "image_src": "https://open-learning-course-data-production.s3.amazonaws.com/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg", - "course_image_metadata": { - "description": "An abstracted aircraft wing with illustrated systems. (Image by MIT OCW.)", - "draft": False, - "file": "https://open-learning-course-data-production.s3.amazonaws.com/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg", - "file_type": "image/jpeg", - "image_metadata": { - "caption": "An abstracted aircraft wing, illustrating the connections between the disciplines of Unified Engineering. (Image by MIT OpenCourseWare.)", - "credit": "", - "image-alt": "Illustration of an aircraft wing showing connections between the disciplines of the course.", - }, - "iscjklanguage": False, - "resourcetype": "Image", - "title": "16-01f05.jpg", - "uid": "8f56bbb3-5d0e-456d-c8b7-0911bec7cd0d", - }, - } - - @contextmanager def reload_mocked_pipeline(*patchers): """Create a context that is rolled back after executing the pipeline""" @@ -172,7 +106,7 @@ def test_ocw_courses_etl(settings, mocker): ) resource = LearningResource.objects.first() - assert resource.readable_id == "16.01" + assert resource.readable_id == "16.01+fall_2005" assert resource.course.extra_course_numbers == ["16.02", "16.03", "16.04"] assert resource.platform.platform == PlatformType.ocw.value assert resource.offered_by.name == OfferedBy.ocw.value diff --git a/learning_resources/migrations/0020_refactor_ocw_readable_id.py b/learning_resources/migrations/0020_refactor_ocw_readable_id.py new file mode 100644 index 0000000000..4fd9ab8812 --- /dev/null +++ b/learning_resources/migrations/0020_refactor_ocw_readable_id.py @@ -0,0 +1,53 @@ +# Generated manually to convert the readable_id for OCW learning resources +import logging + +from django.db import migrations +from django.utils.text import slugify + +from learning_resources.constants import PlatformType +from learning_resources.etl import ocw + +log = logging.getLogger() + + +def update_ocw_readable_id(apps, schema_editor): + """ + Update readable_id and course.extra_course_numbers for existing + OCW learning resources + """ + LearningResource = apps.get_model("learning_resources", "LearningResource") + for resource in LearningResource.objects.filter( + platform__platform=PlatformType.ocw.value + ).prefetch_related("runs"): + resource.etl_source = ocw.ETL_SOURCE + run = resource.runs.filter(url=resource.url).first() + if run: + resource.readable_id = ( + f"{resource.readable_id}+{slugify(run.semester)}_{run.year}" + ) + resource.runs.exclude(pk=run.pk).delete() + resource.save() + else: + log.error("No run found for %s", resource.url) + + +def revert_ocw_readable_id(apps, schema_editor): + """ + Revert readable_id and course.extra_course_numbers for existing + OCW learning resources + """ + LearningResource = apps.get_model("learning_resources", "LearningResource") + for resource in LearningResource.objects.filter( + platform__platform=PlatformType.ocw.value + ).select_related("course"): + resource.readable_id = resource.readable_id.split("+")[0] + resource.save() + + +class Migration(migrations.Migration): + dependencies = [ + ("learning_resources", "0019_departments"), + ] + operations = [ + migrations.RunPython(update_ocw_readable_id, revert_ocw_readable_id), + ] diff --git a/learning_resources/tasks_test.py b/learning_resources/tasks_test.py index 08c0e0fd2a..764ad39101 100644 --- a/learning_resources/tasks_test.py +++ b/learning_resources/tasks_test.py @@ -199,7 +199,7 @@ def test_get_ocw_courses(settings, mocker, mocked_celery, timestamp, overwrite): course_resource = models.Course.objects.first().learning_resource assert course_resource.title == "Unified Engineering I, II, III, & IV" - assert course_resource.readable_id == "16.01" + assert course_resource.readable_id == "16.01+fall_2005" assert course_resource.runs.count() == 1 assert course_resource.runs.first().run_id == "97db384ef34009a64df7cb86cf701979" assert (