mitodl · mbertrand · Oct 16, 2023 · Oct 12, 2023 · Oct 12, 2023 · Oct 12, 2023
diff --git a/course_catalog/etl/pipelines_test.py b/course_catalog/etl/pipelines_test.py
@@ -66,98 +66,6 @@ def test_micromasters_etl():
     assert result == mock_load_programs.return_value
 
 
-def test_xpro_programs_etl():
-    """Verify that xpro programs etl pipeline executes correctly"""
-    with reload_mocked_pipeline(
-        patch("course_catalog.etl.xpro.extract_programs", autospec=True),
-        patch("course_catalog.etl.xpro.transform_programs", autospec=True),
-        patch("course_catalog.etl.loaders.load_programs", autospec=True),
-    ) as patches:
-        mock_extract, mock_transform, mock_load_programs = patches
-        result = pipelines.xpro_programs_etl()
-
-    mock_extract.assert_called_once_with()
-    mock_transform.assert_called_once_with(mock_extract.return_value)
-    mock_load_programs.assert_called_once_with(
-        PlatformType.xpro.value, mock_transform.return_value
-    )
-
-    assert result == mock_load_programs.return_value
-
-
-def test_xpro_courses_etl():
-    """Verify that xpro courses etl pipeline executes correctly"""
-    with reload_mocked_pipeline(
-        patch("course_catalog.etl.xpro.extract_courses", autospec=True),
-        patch("course_catalog.etl.xpro.transform_courses", autospec=True),
-        patch("course_catalog.etl.loaders.load_courses", autospec=True),
-    ) as patches:
-        mock_extract, mock_transform, mock_load_courses = patches
-        result = pipelines.xpro_courses_etl()
-
-    mock_extract.assert_called_once_with()
-    mock_transform.assert_called_once_with(mock_extract.return_value)
-    mock_load_courses.assert_called_once_with(
-        PlatformType.xpro.value,
-        mock_transform.return_value,
-        config=CourseLoaderConfig(prune=True),
-    )
-
-    assert result == mock_load_courses.return_value
-
-
-def test_mitx_etl():
-    """Verify that mitx etl pipeline executes correctly"""
-    with reload_mocked_pipeline(
-        patch("course_catalog.etl.mitx.extract", autospec=True),
-        patch("course_catalog.etl.mitx.transform", autospec=False),
-        patch("course_catalog.etl.loaders.load_courses", autospec=True),
-    ) as patches:
-        mock_extract, mock_transform, mock_load_courses = patches
-        result = pipelines.mitx_etl()
-
-    mock_extract.assert_called_once_with()
-
-    # each of these should be called with the return value of the extract
-    mock_transform.assert_called_once_with(mock_extract.return_value)
-
-    # load_courses should be called *only* with the return value of transform
-    mock_load_courses.assert_called_once_with(
-        PlatformType.mitx.value,
-        mock_transform.return_value,
-        config=CourseLoaderConfig(
-            prune=True,
-            offered_by=OfferedByLoaderConfig(additive=True),
-            runs=LearningResourceRunLoaderConfig(
-                offered_by=OfferedByLoaderConfig(additive=True)
-            ),
-        ),
-    )
-
-    assert result == mock_load_courses.return_value
-
-
-def test_oll_etl():
-    """Verify that OLL etl pipeline executes correctly"""
-    with reload_mocked_pipeline(
-        patch("course_catalog.etl.oll.extract", autospec=True),
-        patch("course_catalog.etl.oll.transform", autospec=False),
-        patch("course_catalog.etl.loaders.load_courses", autospec=True),
-    ) as patches:
-        mock_extract, mock_transform, mock_load_courses = patches
-        result = pipelines.oll_etl()
-
-    mock_extract.assert_called_once_with()
-    mock_transform.assert_called_once_with(mock_extract.return_value)
-    mock_load_courses.assert_called_once_with(
-        PlatformType.oll.value,
-        mock_transform.return_value,
-        config=CourseLoaderConfig(prune=True),
-    )
-
-    assert result == mock_load_courses.return_value
-
-
 def test_youtube_etl():
     """Verify that youtube etl pipeline executes correctly"""
     with reload_mocked_pipeline(
@@ -175,24 +83,6 @@ def test_youtube_etl():
     assert result == mock_load_video_channels.return_value
 
 
-def test_podcast_etl():
-    """Verify that podcast etl pipeline executes correctly"""
-
-    with reload_mocked_pipeline(
-        patch("course_catalog.etl.podcast.extract", autospec=True),
-        patch("course_catalog.etl.podcast.transform", autospec=True),
-        patch("course_catalog.etl.loaders.load_podcasts", autospec=True),
-    ) as patches:
-        mock_extract, mock_transform, mock_load_podcasts = patches
-        result = pipelines.podcast_etl()
-
-    mock_extract.assert_called_once_with()
-    mock_transform.assert_called_once_with(mock_extract.return_value)
-    mock_load_podcasts.assert_called_once_with(mock_transform.return_value)
-
-    assert result == mock_load_podcasts.return_value
-
-
 @pytest.mark.django_db()
 def test_prolearn_programs_etl():
     """

diff --git a/learning_resources/admin.py b/learning_resources/admin.py
@@ -104,7 +104,7 @@ class LearningResourceAdmin(admin.ModelAdmin):
         "published",
     )
     list_filter = ("platform", "offered_by", "resource_type", "published")
-    inlines = [CourseInline, LearningPathInline]
+    inlines = [CourseInline, LearningPathInline, LearningResourceRunInline]
     autocomplete_fields = ("topics",)
 
 

diff --git a/learning_resources/etl/ocw.py b/learning_resources/etl/ocw.py
@@ -10,6 +10,7 @@
 import boto3
 from botocore.exceptions import ClientError
 from django.conf import settings
+from django.utils.text import slugify
 from requests import ReadTimeout
 from retry import retry
 
@@ -37,6 +38,7 @@
 
 OFFERED_BY = {"name": OfferedBy.ocw.value}
 PRIMARY_COURSE_ID = "primary_course_number"
+ETL_SOURCE = "ocw"
 
 
 def transform_content_files(
@@ -276,14 +278,15 @@ def transform_course(course_data: dict) -> dict:
     else:
         uid = uid.replace("-", "")
     course_data["run_id"] = uid
+
     extra_course_numbers = course_data.get("extra_course_numbers", None)
 
     if extra_course_numbers:
         extra_course_numbers = [num.strip() for num in extra_course_numbers.split(",")]
     else:
         extra_course_numbers = []
 
-    course_id = f"{course_data.get(PRIMARY_COURSE_ID)}"
+    readable_id = f"{course_data.get(PRIMARY_COURSE_ID)}+{slugify(course_data.get('term'))}_{course_data.get('year')}"  # noqa: E501
     topics = [
         {"name": topic_name}
         for topic_name in list(
@@ -297,7 +300,9 @@ def transform_course(course_data: dict) -> dict:
     image_src = course_data.get("image_src")
 
     return {
-        "readable_id": course_id,
+        "readable_id": readable_id,
+        "etl_source": ETL_SOURCE,
+        "offered_by": copy.deepcopy(OFFERED_BY),
         "platform": PlatformType.ocw.value,
         "title": course_data["course_title"],
         "departments": course_data.get("department_numbers", []),
@@ -311,7 +316,6 @@ def transform_course(course_data: dict) -> dict:
             .get("image_metadata", {})
             .get("image-alt"),
         },
-        "offered_by": copy.deepcopy(OFFERED_BY),
         "description": course_data["course_description"],
         "url": course_data.get("url"),
         "last_modified": course_data.get("last_modified"),

diff --git a/learning_resources/etl/ocw_test.py b/learning_resources/etl/ocw_test.py
@@ -10,6 +10,7 @@
 
 from learning_resources.conftest import OCW_TEST_PREFIX, setup_s3_ocw
 from learning_resources.etl.ocw import (
+    ETL_SOURCE,
     transform_content_files,
     transform_contentfile,
     transform_course,
@@ -185,6 +186,8 @@ def test_transform_course(settings, legacy_uid, site_uid, expected_uid, has_extr
     }
     transformed_json = transform_course(extracted_json)
     if expected_uid:
+        assert transformed_json["readable_id"] == "16.01+fall_2005"
+        assert transformed_json["etl_source"] == ETL_SOURCE
         assert transformed_json["runs"][0]["run_id"] == expected_uid
         assert transformed_json["image"]["url"] == (
             "http://test.edu/courses/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg"

diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py
@@ -14,72 +14,6 @@
 from learning_resources.models import LearningResource
 
 
-@pytest.fixture()
-def ocw_valid_data():
-    """
-    Return valid ocw data
-    """
-    return {
-        "course_title": "Unified Engineering I, II, III, \u0026 IV",
-        "course_description": "The basic objective of Unified Engineering is to give a solid understanding of the fundamental disciplines of aerospace engineering, as well as their interrelationships and applications. These disciplines are Materials and Structures (M); Computers and Programming (C); Fluid Mechanics (F); Thermodynamics (T); Propulsion (P); and Signals and Systems (S). In choosing to teach these subjects in a unified manner, the instructors seek to explain the common intellectual threads in these disciplines, as well as their combined application to solve engineering Systems Problems (SP). Throughout the year, the instructors emphasize the connections among the disciplines",
-        "site_uid": None,
-        "legacy_uid": "97db384e-f340-09a6-4df7-cb86cf701979",
-        "instructors": [
-            {
-                "first_name": "Mark",
-                "last_name": "Drela",
-                "middle_initial": "",
-                "salutation": "Prof.",
-                "title": "Prof. Mark Drela",
-            },
-            {
-                "first_name": "Steven",
-                "last_name": "Hall",
-                "middle_initial": "",
-                "salutation": "Prof.",
-                "title": "Prof. Steven Hall",
-            },
-        ],
-        "department_numbers": ["16"],
-        "learning_resource_types": [
-            "Lecture Videos",
-            "Course Introduction",
-            "Competition Videos",
-            "Problem Sets with Solutions",
-            "Exams with Solutions",
-        ],
-        "topics": [
-            ["Engineering", "Aerospace Engineering", "Materials Selection"],
-            ["Engineering", "Aerospace Engineering", "Propulsion Systems"],
-            ["Science", "Physics", "Thermodynamics"],
-            ["Engineering", "Mechanical Engineering", "Fluid Mechanics"],
-            ["Engineering", "Aerospace Engineering"],
-            ["Business", "Project Management"],
-        ],
-        "primary_course_number": "16.01",
-        "extra_course_numbers": "16.02, 16.03, 16.04, 17.01",
-        "term": "Fall",
-        "year": "2005",
-        "level": ["Undergraduate"],
-        "image_src": "https://open-learning-course-data-production.s3.amazonaws.com/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg",
-        "course_image_metadata": {
-            "description": "An abstracted aircraft wing with illustrated systems. (Image by MIT OCW.)",
-            "draft": False,
-            "file": "https://open-learning-course-data-production.s3.amazonaws.com/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg",
-            "file_type": "image/jpeg",
-            "image_metadata": {
-                "caption": "An abstracted aircraft wing, illustrating the connections between the disciplines of Unified Engineering. (Image by MIT OpenCourseWare.)",
-                "credit": "",
-                "image-alt": "Illustration of an aircraft wing showing connections between the disciplines of the course.",
-            },
-            "iscjklanguage": False,
-            "resourcetype": "Image",
-            "title": "16-01f05.jpg",
-            "uid": "8f56bbb3-5d0e-456d-c8b7-0911bec7cd0d",
-        },
-    }
-
-
 @contextmanager
 def reload_mocked_pipeline(*patchers):
     """Create a context that is rolled back after executing the pipeline"""
@@ -172,7 +106,7 @@ def test_ocw_courses_etl(settings, mocker):
     )
 
     resource = LearningResource.objects.first()
-    assert resource.readable_id == "16.01"
+    assert resource.readable_id == "16.01+fall_2005"
     assert resource.course.extra_course_numbers == ["16.02", "16.03", "16.04"]
     assert resource.platform.platform == PlatformType.ocw.value
     assert resource.offered_by.name == OfferedBy.ocw.value

diff --git a/learning_resources/migrations/0020_refactor_ocw_readable_id.py b/learning_resources/migrations/0020_refactor_ocw_readable_id.py
@@ -0,0 +1,53 @@
+# Generated manually to convert the readable_id for OCW learning resources
+import logging
+
+from django.db import migrations
+from django.utils.text import slugify
+
+from learning_resources.constants import PlatformType
+from learning_resources.etl import ocw
+
+log = logging.getLogger()
+
+
+def update_ocw_readable_id(apps, schema_editor):
+    """
+    Update readable_id and course.extra_course_numbers for existing
+    OCW learning resources
+    """
+    LearningResource = apps.get_model("learning_resources", "LearningResource")
+    for resource in LearningResource.objects.filter(
+        platform__platform=PlatformType.ocw.value
+    ).prefetch_related("runs"):
+        resource.etl_source = ocw.ETL_SOURCE
+        run = resource.runs.filter(url=resource.url).first()
+        if run:
+            resource.readable_id = (
+                f"{resource.readable_id}+{slugify(run.semester)}_{run.year}"
+            )
+            resource.runs.exclude(pk=run.pk).delete()
+            resource.save()
+        else:
+            log.error("No run found for %s", resource.url)
+
+
+def revert_ocw_readable_id(apps, schema_editor):
+    """
+    Revert readable_id and course.extra_course_numbers for existing
+    OCW learning resources
+    """
+    LearningResource = apps.get_model("learning_resources", "LearningResource")
+    for resource in LearningResource.objects.filter(
+        platform__platform=PlatformType.ocw.value
+    ).select_related("course"):
+        resource.readable_id = resource.readable_id.split("+")[0]
+        resource.save()
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("learning_resources", "0019_departments"),
+    ]
+    operations = [
+        migrations.RunPython(update_ocw_readable_id, revert_ocw_readable_id),
+    ]
diff --git a/learning_resources/tasks_test.py b/learning_resources/tasks_test.py
@@ -199,7 +199,7 @@ def test_get_ocw_courses(settings, mocker, mocked_celery, timestamp, overwrite):
 
     course_resource = models.Course.objects.first().learning_resource
     assert course_resource.title == "Unified Engineering I, II, III, & IV"
-    assert course_resource.readable_id == "16.01"
+    assert course_resource.readable_id == "16.01+fall_2005"
     assert course_resource.runs.count() == 1
     assert course_resource.runs.first().run_id == "97db384ef34009a64df7cb86cf701979"
     assert (