From 865de9ae2d8a50a5e932eaf07d136d6c92f0bebe Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Tue, 29 Oct 2024 11:25:12 -0400 Subject: [PATCH 1/2] Fix prolearn bug for existing resources whose urls have changed --- learning_resources/etl/loaders.py | 26 +++++++++++++++-- learning_resources/etl/loaders_test.py | 40 ++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index 1a80590a13..9455332de6 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -301,7 +301,7 @@ def load_run( return learning_resource_run -def upsert_course_or_program( +def upsert_course_or_program( # noqa: C901 resource_data: dict, blocklist: list[str], duplicates: list[dict], @@ -388,9 +388,31 @@ def upsert_course_or_program( platform=platform, resource_type=LearningResourceType.course.name, ).order_by("-updated_on") - if existing_courses.count() > 1: + if existing_courses.count() > 0: for course in existing_courses[1:]: resource_delete_actions(course) + else: + # does a resource with the same readable id already exist? + # if so, update it with the new unique field value + existing_id = LearningResource.objects.filter( + readable_id=resource_data["readable_id"], + platform=platform, + resource_type=LearningResourceType.course.name, + ).exists() + if existing_id: + resource_data[unique_field_name] = unique_field_value + return LearningResource.objects.select_for_update().update_or_create( + readable_id=resource_data.pop("readable_id"), + platform=platform, + resource_type=resource_type, + defaults=resource_data, + ) + return LearningResource.objects.select_for_update().update_or_create( + **{unique_field_name: unique_field_value}, + platform=platform, + resource_type=resource_type, + defaults=resource_data, + ) else: unique_field_value = resource_id return LearningResource.objects.select_for_update().update_or_create( diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index c9361a1852..70acf2e8e8 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -627,6 +627,46 @@ def test_load_course_unique_urls(unique_url): assert old_course == result +def test_load_course_old_id_new_url(): + """ + If url is supposed to be unique field, and a resource with the same readable_id + but different url exists, that resource should be updated with the new url. + """ + unique_url = "https://mit.edu/unique.html" + readable_id = "new_unique_course_id" + platform = LearningResourcePlatformFactory.create(code=PlatformType.ocw.name) + existing_course = LearningResourceFactory.create( + readable_id=readable_id, + url="https://mit.edu/old.html", + platform=platform, + is_course=True, + ) + props = { + "readable_id": readable_id, + "platform": PlatformType.ocw.name, + "offered_by": {"code": OfferedBy.ocw.name}, + "title": "New title", + "url": unique_url, + "description": "something", + "unique_field": "url", + "runs": [ + { + "run_id": "run_id", + "enrollment_start": "2024-01-01T00:00:00Z", + "start_date": "2024-01-20T00:00:00Z", + "end_date": "2024-06-20T00:00:00Z", + } + ], + } + result = load_course(props, [], []) + assert result.readable_id == readable_id + assert result.url == unique_url + assert result.published is True + existing_course.refresh_from_db() + assert existing_course.url == unique_url + assert existing_course.readable_id == readable_id + + @pytest.mark.parametrize("course_exists", [True, False]) def test_load_course_fetch_only(mocker, course_exists): """When fetch_only is True, course should just be fetched from db""" From e60e44915489b77bf1fdd22f4b2146b9b532a13f Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Tue, 29 Oct 2024 14:08:23 -0400 Subject: [PATCH 2/2] Simplify code a bit --- learning_resources/etl/loaders.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index 9455332de6..9def0aafe3 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -391,22 +391,16 @@ def upsert_course_or_program( # noqa: C901 if existing_courses.count() > 0: for course in existing_courses[1:]: resource_delete_actions(course) - else: - # does a resource with the same readable id already exist? - # if so, update it with the new unique field value - existing_id = LearningResource.objects.filter( - readable_id=resource_data["readable_id"], - platform=platform, - resource_type=LearningResourceType.course.name, - ).exists() - if existing_id: - resource_data[unique_field_name] = unique_field_value - return LearningResource.objects.select_for_update().update_or_create( - readable_id=resource_data.pop("readable_id"), - platform=platform, - resource_type=resource_type, - defaults=resource_data, - ) + # does a resource with the same readable id already exist? + # if so, update it with the new unique field value + elif LearningResource.objects.filter( + readable_id=resource_data["readable_id"], + platform=platform, + resource_type=LearningResourceType.course.name, + ).exists(): + resource_data[unique_field_name] = unique_field_value + unique_field_name = READABLE_ID_FIELD + unique_field_value = resource_data.pop(READABLE_ID_FIELD) return LearningResource.objects.select_for_update().update_or_create( **{unique_field_name: unique_field_value}, platform=platform,