diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index 1a80590a13..9def0aafe3 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -301,7 +301,7 @@ def load_run( return learning_resource_run -def upsert_course_or_program( +def upsert_course_or_program( # noqa: C901 resource_data: dict, blocklist: list[str], duplicates: list[dict], @@ -388,9 +388,25 @@ def upsert_course_or_program( platform=platform, resource_type=LearningResourceType.course.name, ).order_by("-updated_on") - if existing_courses.count() > 1: + if existing_courses.count() > 0: for course in existing_courses[1:]: resource_delete_actions(course) + # does a resource with the same readable id already exist? + # if so, update it with the new unique field value + elif LearningResource.objects.filter( + readable_id=resource_data["readable_id"], + platform=platform, + resource_type=LearningResourceType.course.name, + ).exists(): + resource_data[unique_field_name] = unique_field_value + unique_field_name = READABLE_ID_FIELD + unique_field_value = resource_data.pop(READABLE_ID_FIELD) + return LearningResource.objects.select_for_update().update_or_create( + **{unique_field_name: unique_field_value}, + platform=platform, + resource_type=resource_type, + defaults=resource_data, + ) else: unique_field_value = resource_id return LearningResource.objects.select_for_update().update_or_create( diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index c9361a1852..70acf2e8e8 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -627,6 +627,46 @@ def test_load_course_unique_urls(unique_url): assert old_course == result +def test_load_course_old_id_new_url(): + """ + If url is supposed to be unique field, and a resource with the same readable_id + but different url exists, that resource should be updated with the new url. + """ + unique_url = "https://mit.edu/unique.html" + readable_id = "new_unique_course_id" + platform = LearningResourcePlatformFactory.create(code=PlatformType.ocw.name) + existing_course = LearningResourceFactory.create( + readable_id=readable_id, + url="https://mit.edu/old.html", + platform=platform, + is_course=True, + ) + props = { + "readable_id": readable_id, + "platform": PlatformType.ocw.name, + "offered_by": {"code": OfferedBy.ocw.name}, + "title": "New title", + "url": unique_url, + "description": "something", + "unique_field": "url", + "runs": [ + { + "run_id": "run_id", + "enrollment_start": "2024-01-01T00:00:00Z", + "start_date": "2024-01-20T00:00:00Z", + "end_date": "2024-06-20T00:00:00Z", + } + ], + } + result = load_course(props, [], []) + assert result.readable_id == readable_id + assert result.url == unique_url + assert result.published is True + existing_course.refresh_from_db() + assert existing_course.url == unique_url + assert existing_course.readable_id == readable_id + + @pytest.mark.parametrize("course_exists", [True, False]) def test_load_course_fetch_only(mocker, course_exists): """When fetch_only is True, course should just be fetched from db"""