Skip to content
110 changes: 0 additions & 110 deletions course_catalog/etl/pipelines_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,98 +66,6 @@ def test_micromasters_etl():
assert result == mock_load_programs.return_value


def test_xpro_programs_etl():
"""Verify that xpro programs etl pipeline executes correctly"""
with reload_mocked_pipeline(
patch("course_catalog.etl.xpro.extract_programs", autospec=True),
patch("course_catalog.etl.xpro.transform_programs", autospec=True),
patch("course_catalog.etl.loaders.load_programs", autospec=True),
) as patches:
mock_extract, mock_transform, mock_load_programs = patches
result = pipelines.xpro_programs_etl()

mock_extract.assert_called_once_with()
mock_transform.assert_called_once_with(mock_extract.return_value)
mock_load_programs.assert_called_once_with(
PlatformType.xpro.value, mock_transform.return_value
)

assert result == mock_load_programs.return_value


def test_xpro_courses_etl():
"""Verify that xpro courses etl pipeline executes correctly"""
with reload_mocked_pipeline(
patch("course_catalog.etl.xpro.extract_courses", autospec=True),
patch("course_catalog.etl.xpro.transform_courses", autospec=True),
patch("course_catalog.etl.loaders.load_courses", autospec=True),
) as patches:
mock_extract, mock_transform, mock_load_courses = patches
result = pipelines.xpro_courses_etl()

mock_extract.assert_called_once_with()
mock_transform.assert_called_once_with(mock_extract.return_value)
mock_load_courses.assert_called_once_with(
PlatformType.xpro.value,
mock_transform.return_value,
config=CourseLoaderConfig(prune=True),
)

assert result == mock_load_courses.return_value


def test_mitx_etl():
"""Verify that mitx etl pipeline executes correctly"""
with reload_mocked_pipeline(
patch("course_catalog.etl.mitx.extract", autospec=True),
patch("course_catalog.etl.mitx.transform", autospec=False),
patch("course_catalog.etl.loaders.load_courses", autospec=True),
) as patches:
mock_extract, mock_transform, mock_load_courses = patches
result = pipelines.mitx_etl()

mock_extract.assert_called_once_with()

# each of these should be called with the return value of the extract
mock_transform.assert_called_once_with(mock_extract.return_value)

# load_courses should be called *only* with the return value of transform
mock_load_courses.assert_called_once_with(
PlatformType.mitx.value,
mock_transform.return_value,
config=CourseLoaderConfig(
prune=True,
offered_by=OfferedByLoaderConfig(additive=True),
runs=LearningResourceRunLoaderConfig(
offered_by=OfferedByLoaderConfig(additive=True)
),
),
)

assert result == mock_load_courses.return_value


def test_oll_etl():
"""Verify that OLL etl pipeline executes correctly"""
with reload_mocked_pipeline(
patch("course_catalog.etl.oll.extract", autospec=True),
patch("course_catalog.etl.oll.transform", autospec=False),
patch("course_catalog.etl.loaders.load_courses", autospec=True),
) as patches:
mock_extract, mock_transform, mock_load_courses = patches
result = pipelines.oll_etl()

mock_extract.assert_called_once_with()
mock_transform.assert_called_once_with(mock_extract.return_value)
mock_load_courses.assert_called_once_with(
PlatformType.oll.value,
mock_transform.return_value,
config=CourseLoaderConfig(prune=True),
)

assert result == mock_load_courses.return_value


def test_youtube_etl():
"""Verify that youtube etl pipeline executes correctly"""
with reload_mocked_pipeline(
Expand All @@ -175,24 +83,6 @@ def test_youtube_etl():
assert result == mock_load_video_channels.return_value


def test_podcast_etl():
"""Verify that podcast etl pipeline executes correctly"""

with reload_mocked_pipeline(
patch("course_catalog.etl.podcast.extract", autospec=True),
patch("course_catalog.etl.podcast.transform", autospec=True),
patch("course_catalog.etl.loaders.load_podcasts", autospec=True),
) as patches:
mock_extract, mock_transform, mock_load_podcasts = patches
result = pipelines.podcast_etl()

mock_extract.assert_called_once_with()
mock_transform.assert_called_once_with(mock_extract.return_value)
mock_load_podcasts.assert_called_once_with(mock_transform.return_value)

assert result == mock_load_podcasts.return_value


@pytest.mark.django_db()
def test_prolearn_programs_etl():
"""
Expand Down
2 changes: 1 addition & 1 deletion learning_resources/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class LearningResourceAdmin(admin.ModelAdmin):
"published",
)
list_filter = ("platform", "offered_by", "resource_type", "published")
inlines = [CourseInline, LearningPathInline]
inlines = [CourseInline, LearningPathInline, LearningResourceRunInline]
autocomplete_fields = ("topics",)


Expand Down
10 changes: 7 additions & 3 deletions learning_resources/etl/ocw.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import boto3
from botocore.exceptions import ClientError
from django.conf import settings
from django.utils.text import slugify
from requests import ReadTimeout
from retry import retry

Expand Down Expand Up @@ -37,6 +38,7 @@

OFFERED_BY = {"name": OfferedBy.ocw.value}
PRIMARY_COURSE_ID = "primary_course_number"
ETL_SOURCE = "ocw"


def transform_content_files(
Expand Down Expand Up @@ -276,14 +278,15 @@ def transform_course(course_data: dict) -> dict:
else:
uid = uid.replace("-", "")
course_data["run_id"] = uid

extra_course_numbers = course_data.get("extra_course_numbers", None)

if extra_course_numbers:
extra_course_numbers = [num.strip() for num in extra_course_numbers.split(",")]
else:
extra_course_numbers = []

course_id = f"{course_data.get(PRIMARY_COURSE_ID)}"
readable_id = f"{course_data.get(PRIMARY_COURSE_ID)}+{slugify(course_data.get('term'))}_{course_data.get('year')}" # noqa: E501
topics = [
{"name": topic_name}
for topic_name in list(
Expand All @@ -297,7 +300,9 @@ def transform_course(course_data: dict) -> dict:
image_src = course_data.get("image_src")

return {
"readable_id": course_id,
"readable_id": readable_id,
"etl_source": ETL_SOURCE,
"offered_by": copy.deepcopy(OFFERED_BY),
"platform": PlatformType.ocw.value,
"title": course_data["course_title"],
"departments": course_data.get("department_numbers", []),
Expand All @@ -311,7 +316,6 @@ def transform_course(course_data: dict) -> dict:
.get("image_metadata", {})
.get("image-alt"),
},
"offered_by": copy.deepcopy(OFFERED_BY),
"description": course_data["course_description"],
"url": course_data.get("url"),
"last_modified": course_data.get("last_modified"),
Expand Down
3 changes: 3 additions & 0 deletions learning_resources/etl/ocw_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from learning_resources.conftest import OCW_TEST_PREFIX, setup_s3_ocw
from learning_resources.etl.ocw import (
ETL_SOURCE,
transform_content_files,
transform_contentfile,
transform_course,
Expand Down Expand Up @@ -185,6 +186,8 @@ def test_transform_course(settings, legacy_uid, site_uid, expected_uid, has_extr
}
transformed_json = transform_course(extracted_json)
if expected_uid:
assert transformed_json["readable_id"] == "16.01+fall_2005"
assert transformed_json["etl_source"] == ETL_SOURCE
assert transformed_json["runs"][0]["run_id"] == expected_uid
assert transformed_json["image"]["url"] == (
"http://test.edu/courses/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg"
Expand Down
68 changes: 1 addition & 67 deletions learning_resources/etl/pipelines_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,72 +14,6 @@
from learning_resources.models import LearningResource


@pytest.fixture()
def ocw_valid_data():
"""
Return valid ocw data
"""
return {
"course_title": "Unified Engineering I, II, III, \u0026 IV",
"course_description": "The basic objective of Unified Engineering is to give a solid understanding of the fundamental disciplines of aerospace engineering, as well as their interrelationships and applications. These disciplines are Materials and Structures (M); Computers and Programming (C); Fluid Mechanics (F); Thermodynamics (T); Propulsion (P); and Signals and Systems (S). In choosing to teach these subjects in a unified manner, the instructors seek to explain the common intellectual threads in these disciplines, as well as their combined application to solve engineering Systems Problems (SP). Throughout the year, the instructors emphasize the connections among the disciplines",
"site_uid": None,
"legacy_uid": "97db384e-f340-09a6-4df7-cb86cf701979",
"instructors": [
{
"first_name": "Mark",
"last_name": "Drela",
"middle_initial": "",
"salutation": "Prof.",
"title": "Prof. Mark Drela",
},
{
"first_name": "Steven",
"last_name": "Hall",
"middle_initial": "",
"salutation": "Prof.",
"title": "Prof. Steven Hall",
},
],
"department_numbers": ["16"],
"learning_resource_types": [
"Lecture Videos",
"Course Introduction",
"Competition Videos",
"Problem Sets with Solutions",
"Exams with Solutions",
],
"topics": [
["Engineering", "Aerospace Engineering", "Materials Selection"],
["Engineering", "Aerospace Engineering", "Propulsion Systems"],
["Science", "Physics", "Thermodynamics"],
["Engineering", "Mechanical Engineering", "Fluid Mechanics"],
["Engineering", "Aerospace Engineering"],
["Business", "Project Management"],
],
"primary_course_number": "16.01",
"extra_course_numbers": "16.02, 16.03, 16.04, 17.01",
"term": "Fall",
"year": "2005",
"level": ["Undergraduate"],
"image_src": "https://open-learning-course-data-production.s3.amazonaws.com/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg",
"course_image_metadata": {
"description": "An abstracted aircraft wing with illustrated systems. (Image by MIT OCW.)",
"draft": False,
"file": "https://open-learning-course-data-production.s3.amazonaws.com/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/8f56bbb35d0e456dc8b70911bec7cd0d_16-01f05.jpg",
"file_type": "image/jpeg",
"image_metadata": {
"caption": "An abstracted aircraft wing, illustrating the connections between the disciplines of Unified Engineering. (Image by MIT OpenCourseWare.)",
"credit": "",
"image-alt": "Illustration of an aircraft wing showing connections between the disciplines of the course.",
},
"iscjklanguage": False,
"resourcetype": "Image",
"title": "16-01f05.jpg",
"uid": "8f56bbb3-5d0e-456d-c8b7-0911bec7cd0d",
},
}


@contextmanager
def reload_mocked_pipeline(*patchers):
"""Create a context that is rolled back after executing the pipeline"""
Expand Down Expand Up @@ -172,7 +106,7 @@ def test_ocw_courses_etl(settings, mocker):
)

resource = LearningResource.objects.first()
assert resource.readable_id == "16.01"
assert resource.readable_id == "16.01+fall_2005"
assert resource.course.extra_course_numbers == ["16.02", "16.03", "16.04"]
assert resource.platform.platform == PlatformType.ocw.value
assert resource.offered_by.name == OfferedBy.ocw.value
Expand Down
53 changes: 53 additions & 0 deletions learning_resources/migrations/0020_refactor_ocw_readable_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Generated manually to convert the readable_id for OCW learning resources
import logging

from django.db import migrations
from django.utils.text import slugify

from learning_resources.constants import PlatformType
from learning_resources.etl import ocw

log = logging.getLogger()


def update_ocw_readable_id(apps, schema_editor):
"""
Update readable_id and course.extra_course_numbers for existing
OCW learning resources
"""
LearningResource = apps.get_model("learning_resources", "LearningResource")
for resource in LearningResource.objects.filter(
platform__platform=PlatformType.ocw.value
).prefetch_related("runs"):
resource.etl_source = ocw.ETL_SOURCE
run = resource.runs.filter(url=resource.url).first()
if run:
resource.readable_id = (
f"{resource.readable_id}+{slugify(run.semester)}_{run.year}"
)
resource.runs.exclude(pk=run.pk).delete()
resource.save()
else:
log.error("No run found for %s", resource.url)


def revert_ocw_readable_id(apps, schema_editor):
"""
Revert readable_id and course.extra_course_numbers for existing
OCW learning resources
"""
LearningResource = apps.get_model("learning_resources", "LearningResource")
for resource in LearningResource.objects.filter(
platform__platform=PlatformType.ocw.value
).select_related("course"):
resource.readable_id = resource.readable_id.split("+")[0]
resource.save()


class Migration(migrations.Migration):
dependencies = [
("learning_resources", "0019_departments"),
]
operations = [
migrations.RunPython(update_ocw_readable_id, revert_ocw_readable_id),
]
2 changes: 1 addition & 1 deletion learning_resources/tasks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def test_get_ocw_courses(settings, mocker, mocked_celery, timestamp, overwrite):

course_resource = models.Course.objects.first().learning_resource
assert course_resource.title == "Unified Engineering I, II, III, & IV"
assert course_resource.readable_id == "16.01"
assert course_resource.readable_id == "16.01+fall_2005"
assert course_resource.runs.count() == 1
assert course_resource.runs.first().run_id == "97db384ef34009a64df7cb86cf701979"
assert (
Expand Down