From f12b35e6e511b944384803a0eb3fd6dbcc117d18 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Tue, 31 Oct 2023 16:33:10 -0400 Subject: [PATCH 01/11] Refactor course numbers --- frontends/api/src/generated/api.ts | 12 ++++ learning_resources/etl/constants.py | 7 ++ learning_resources/etl/micromasters.py | 6 ++ learning_resources/etl/mitxonline.py | 6 ++ learning_resources/etl/ocw.py | 9 ++- learning_resources/etl/openedx.py | 8 ++- learning_resources/etl/prolearn.py | 5 +- learning_resources/etl/utils.py | 62 ++++++++++++++-- learning_resources/etl/xpro.py | 8 ++- learning_resources/models.py | 5 +- learning_resources_search/api.py | 9 +++ learning_resources_search/constants.py | 12 ++++ learning_resources_search/serializers.py | 70 ++++++++++++++++--- learning_resources_search/serializers_test.py | 1 - learning_resources_search/tasks.py | 6 +- openapi.yaml | 8 +++ 16 files changed, 209 insertions(+), 25 deletions(-) diff --git a/frontends/api/src/generated/api.ts b/frontends/api/src/generated/api.ts index 66805bd61f..7801da3abb 100644 --- a/frontends/api/src/generated/api.ts +++ b/frontends/api/src/generated/api.ts @@ -274,6 +274,12 @@ export interface Course { * @memberof Course */ extra_course_numbers?: Array | null + /** + * + * @type {{ [key: string]: any; }} + * @memberof Course + */ + course_numbers?: { [key: string]: any } | null } /** * Serializer for the Course model @@ -287,6 +293,12 @@ export interface CourseRequest { * @memberof CourseRequest */ extra_course_numbers?: Array | null + /** + * + * @type {{ [key: string]: any; }} + * @memberof CourseRequest + */ + course_numbers?: { [key: string]: any } | null } /** * Serializer for the LearningPath model diff --git a/learning_resources/etl/constants.py b/learning_resources/etl/constants.py index 0aa542bcb9..3b60a6206c 100644 --- a/learning_resources/etl/constants.py +++ b/learning_resources/etl/constants.py @@ -48,3 +48,10 @@ class ETLSource(Enum): ocw = "ocw" prolearn = "prolearn" podcast = "podcast" + + +class CourseNumberType(Enum): + """Enum of course number types""" + + primary = "Primary" + cross_listed = "Cross-listed" diff --git a/learning_resources/etl/micromasters.py b/learning_resources/etl/micromasters.py index 22001e1195..d62b65c059 100644 --- a/learning_resources/etl/micromasters.py +++ b/learning_resources/etl/micromasters.py @@ -5,6 +5,7 @@ from learning_resources.constants import LearningResourceType, OfferedBy, PlatformType from learning_resources.etl.constants import COMMON_HEADERS, ETLSource +from learning_resources.etl.utils import generate_course_numbers_json from learning_resources.models import LearningResource OFFERED_BY = {"code": OfferedBy.mitx.name} @@ -93,6 +94,11 @@ def transform(programs_data): for run in course["course_runs"] if run.get("edx_course_key", None) ], + "course": { + "course_numbers": generate_course_numbers_json( + course["edx_key"], is_ocw=False + ), + }, } for course in sorted( program["courses"], diff --git a/learning_resources/etl/mitxonline.py b/learning_resources/etl/mitxonline.py index c68e71a14e..e185a2ae99 100644 --- a/learning_resources/etl/mitxonline.py +++ b/learning_resources/etl/mitxonline.py @@ -13,6 +13,7 @@ from learning_resources.etl.constants import ETLSource from learning_resources.etl.utils import ( extract_valid_department_from_id, + generate_course_numbers_json, transform_topics, ) @@ -146,6 +147,11 @@ def _transform_course(course): "runs": [ _transform_run(course_run, course) for course_run in course["courseruns"] ], + "course": { + "course_numbers": generate_course_numbers_json( + course["readable_id"], is_ocw=False + ), + }, "published": bool( parse_page_attribute(course, "page_url") ), # a course is only considered published if it has a page url diff --git a/learning_resources/etl/ocw.py b/learning_resources/etl/ocw.py index cb418bba9f..84f54575fe 100644 --- a/learning_resources/etl/ocw.py +++ b/learning_resources/etl/ocw.py @@ -26,6 +26,7 @@ from learning_resources.etl.constants import ETLSource from learning_resources.etl.utils import ( extract_text_metadata, + generate_course_numbers_json, get_content_type, ) from learning_resources.models import ContentFile, LearningResource @@ -286,7 +287,7 @@ def transform_course(course_data: dict) -> dict: else: extra_course_numbers = [] - readable_id = f"{course_data.get(PRIMARY_COURSE_ID)}+{slugify(course_data.get('term'))}_{course_data.get('year')}" # noqa: E501 + readable_id = f"{course_data[PRIMARY_COURSE_ID]}+{slugify(course_data.get('term'))}_{course_data.get('year')}" # noqa: E501 topics = [ {"name": topic_name} for topic_name in list( @@ -321,7 +322,11 @@ def transform_course(course_data: dict) -> dict: "last_modified": course_data.get("last_modified"), "published": True, "course": { - "extra_course_numbers": extra_course_numbers, + "course_numbers": generate_course_numbers_json( + course_data[PRIMARY_COURSE_ID], + extra_nums=extra_course_numbers, + is_ocw=True, + ), }, "topics": topics, "runs": [transform_run(course_data)], diff --git a/learning_resources/etl/openedx.py b/learning_resources/etl/openedx.py index 573e12b04e..cc6b6fc6bb 100644 --- a/learning_resources/etl/openedx.py +++ b/learning_resources/etl/openedx.py @@ -12,7 +12,10 @@ from learning_resources.constants import LearningResourceType from learning_resources.etl.constants import COMMON_HEADERS -from learning_resources.etl.utils import extract_valid_department_from_id +from learning_resources.etl.utils import ( + extract_valid_department_from_id, + generate_course_numbers_json, +) from learning_resources.utils import get_year_and_semester MIT_OWNER_KEYS = ["MITx", "MITx_PRO"] @@ -262,6 +265,9 @@ def _transform_course(config, course): for course_run in course.get("course_runs", []) if _filter_course_run(course_run) ], + "course": { + "course_numbers": generate_course_numbers_json(course.get("key")), + }, "published": any( run["status"] == "published" for run in course.get("course_runs", []) ), diff --git a/learning_resources/etl/prolearn.py b/learning_resources/etl/prolearn.py index b35cc78dc9..6189ff265a 100644 --- a/learning_resources/etl/prolearn.py +++ b/learning_resources/etl/prolearn.py @@ -11,7 +11,7 @@ from django.conf import settings from learning_resources.etl.constants import ETLSource -from learning_resources.etl.utils import transform_topics +from learning_resources.etl.utils import generate_course_numbers_json, transform_topics from learning_resources.models import LearningResourceOfferor, LearningResourcePlatform log = logging.getLogger(__name__) @@ -316,6 +316,9 @@ def _transform_course( "url": parse_url(course), "image": parse_image(course), "description": course["body"], + "course": { + "course_numbers": generate_course_numbers_json(course["nid"]), + }, "published": True, "topics": parse_topic(course), "runs": _transform_runs(course), diff --git a/learning_resources/etl/utils.py b/learning_resources/etl/utils.py index 67b13cfbf3..3e880ebe73 100644 --- a/learning_resources/etl/utils.py +++ b/learning_resources/etl/utils.py @@ -33,8 +33,13 @@ DEPARTMENTS, VALID_TEXT_FILE_TYPES, ) -from learning_resources.etl.constants import ETLSource -from learning_resources.models import ContentFile, LearningResourceRun +from learning_resources.etl.constants import CourseNumberType, ETLSource +from learning_resources.models import ( + ContentFile, + LearningResourceDepartment, + LearningResourceRun, +) +from learning_resources.serializers import LearningResourceDepartmentSerializer log = logging.getLogger(__name__) @@ -496,7 +501,9 @@ def get_content_type(file_type: str) -> str: return CONTENT_TYPE_FILE -def extract_valid_department_from_id(course_string: str) -> list[str]: +def extract_valid_department_from_id( + course_string: str, is_ocw: bool = False # noqa: FBT001, FBT002 +) -> list[str]: """ Extracts a department from course data and returns @@ -506,8 +513,55 @@ def extract_valid_department_from_id(course_string: str) -> list[str]: Returns: department (str): parsed department string """ # noqa: D401 - department_string = re.search(r"\+([^\.]*)\.", course_string) + num_pattern = r"^(\d+)\.*" if is_ocw else r"\+([^\.]*)\." + department_string = re.search(num_pattern, course_string) if department_string: dept_candidate = department_string.groups()[0] + # Some CMS-W department courses start with 21W, but we want to use CMS-W + if dept_candidate == "21W": + dept_candidate = "CMS-W" return [dept_candidate] if dept_candidate in DEPARTMENTS else [] return [] + + +def generate_course_numbers_json( + course_num: str, + extra_nums: list[str] | None = None, + is_ocw: bool = False, # noqa: FBT001, FBT002 +) -> list[dict]: + """ + Generate a dict containing info on course numbers and departments + + Args: + course_num (str): primary course number + extra_nums (list[str]): list of cross-listed course numbers + is_ocw (bool): whether or not the course is an OCW course + + Returns: + course_number_json (list[dict]): list of dicts containing course number info + + """ + course_number_json = [] + course_numbers = [course_num] + if not extra_nums: + extra_nums = [] + course_numbers.extend(extra_nums) + for idx, num in enumerate(course_numbers): + dept_id = extract_valid_department_from_id(num, is_ocw=is_ocw) + dept = ( + LearningResourceDepartment.objects.filter(department_id=dept_id[0]).first() + if dept_id + else None + ) + course_number_json.append( + { + "value": num, + "listing_type": CourseNumberType.primary.value + if idx == 0 + else CourseNumberType.cross_listed.value, + "department": LearningResourceDepartmentSerializer(instance=dept).data + if dept + else None, + } + ) + return course_number_json diff --git a/learning_resources/etl/xpro.py b/learning_resources/etl/xpro.py index 44f5ff7b79..f5928d35cc 100644 --- a/learning_resources/etl/xpro.py +++ b/learning_resources/etl/xpro.py @@ -9,7 +9,7 @@ from learning_resources.constants import LearningResourceType, OfferedBy, PlatformType from learning_resources.etl.constants import ETLSource -from learning_resources.etl.utils import transform_topics +from learning_resources.etl.utils import generate_course_numbers_json, transform_topics log = logging.getLogger(__name__) @@ -87,7 +87,6 @@ def _transform_learning_resource_course(course): Args: course (dict): course data - xpro_platform_map (dict): dict of xpro platform names to platform values Returns: dict: normalized learning resource data @@ -108,6 +107,11 @@ def _transform_learning_resource_course(course): "topics": transform_topics(course.get("topics", [])), "runs": [_transform_run(course_run) for course_run in course["courseruns"]], "resource_type": LearningResourceType.course.name, + "course": { + "course_numbers": generate_course_numbers_json( + course["readable_id"], is_ocw=False + ), + }, } diff --git a/learning_resources/models.py b/learning_resources/models.py index e705d75fc1..8535815e7d 100644 --- a/learning_resources/models.py +++ b/learning_resources/models.py @@ -3,6 +3,7 @@ from django.contrib.auth.models import User from django.contrib.postgres.fields import ArrayField from django.db import models +from django.db.models import JSONField from learning_resources import constants from learning_resources.constants import ( @@ -232,9 +233,7 @@ class Course(TimestampedModel): on_delete=models.deletion.CASCADE, primary_key=True, ) - extra_course_numbers = ArrayField( - models.CharField(max_length=128), null=True, blank=True - ) + course_numbers = JSONField(null=True, blank=True) @property def runs(self): diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py index a7e763ed32..180a8ecbb2 100644 --- a/learning_resources_search/api.py +++ b/learning_resources_search/api.py @@ -13,6 +13,7 @@ SEARCH_NESTED_FILTERS, TOPICS_QUERY_FIELDS, ) +from learning_resources_search.serializers import OSLearningResourceSerializer SIMILAR_RESOURCE_RELEVANT_FIELDS = ["title", "short_description"] LEARN_SUGGEST_FIELDS = ["title.trigram", "description.trigram"] @@ -326,6 +327,14 @@ def execute_learn_search(search_params): ) search = Search(index=",".join(indexes)) + search = search.source( + fields={ + "excludes": [ + *OSLearningResourceSerializer.SOURCE_EXCLUDED_FIELDS, + ] + } + ) + if search_params.get("offset"): search = search.extra(from_=search_params.get("offset")) diff --git a/learning_resources_search/constants.py b/learning_resources_search/constants.py index 7c584b759b..8795cdf2b5 100644 --- a/learning_resources_search/constants.py +++ b/learning_resources_search/constants.py @@ -119,6 +119,15 @@ class IndexestoUpdate(Enum): }, "offered_by": {"type": "keyword"}, "resource_content_tags": {"type": "keyword"}, + "department_course_numbers": { + "type": "nested", + "properties": { + "coursenum": {"type": "keyword"}, + "sort_coursenum": {"type": "keyword"}, + "department": {"type": "keyword"}, + "primary": {"type": "boolean"}, + }, + }, "runs": { "type": "nested", "properties": { @@ -196,3 +205,6 @@ class IndexestoUpdate(Enum): } SEARCH_CONN_EXCEPTIONS = (ESConnectionError, UrlTimeoutError) +SOURCE_EXCLUSIONS = [ + "department_course_numbers", +] diff --git a/learning_resources_search/serializers.py b/learning_resources_search/serializers.py index 9a212f1a8d..e8aa7aa35a 100644 --- a/learning_resources_search/serializers.py +++ b/learning_resources_search/serializers.py @@ -8,6 +8,7 @@ from rest_framework import serializers from learning_resources.constants import LearningResourceType, OfferedBy, PlatformType +from learning_resources.etl.constants import CourseNumberType from learning_resources.models import LearningResource from learning_resources.serializers import ( LearningResourceSerializer, @@ -17,6 +18,35 @@ log = logging.getLogger() +def get_department_course_number_dict(coursenum_data): + """ + Generate course number dictionary from course number data + """ + department_num = coursenum_data.get("department", {}).get("department_id") + course_num = coursenum_data.get("value") + if department_num[0].isdigit() and len(department_num) == 1: + sort_coursenum = f"0{course_num}" + else: + sort_coursenum = course_num + + return { + "coursenum": course_num, + "department": coursenum_data.get("department", {}).get("name"), + "primary": coursenum_data.get("listing_type") == CourseNumberType.primary.value, + "sort_coursenum": sort_coursenum, + } + + +def customize_resource_data_for_search(data: dict) -> dict: + """Add any special search-related fields to the serializer data here""" + if data.get("resource_type") == LearningResourceType.course.name: + data["department_course_numbers"] = [ + get_department_course_number_dict(coursenum) + for coursenum in data["course"]["course_numbers"] + ] + return data + + def extract_values(obj, key): """ Pull all values of specified key from nested JSON. @@ -259,10 +289,12 @@ def serialize_course_for_bulk(learning_resource_obj): Args: learning_resource_obj (LearningResource): A course learning resource """ - return { - "_id": learning_resource_obj.id, - **LearningResourceSerializer(learning_resource_obj).data, - } + return customize_resource_data_for_search( + { + "_id": learning_resource_obj.id, + **LearningResourceSerializer(learning_resource_obj).data, + } + ) def serialize_bulk_programs(ids): @@ -294,10 +326,12 @@ def serialize_program_for_bulk(learning_resource_obj): Args: learning_resource_obj (LearningResource): A program learning_resource object """ - return { - "_id": learning_resource_obj.id, - **LearningResourceSerializer(learning_resource_obj).data, - } + return customize_resource_data_for_search( + { + "_id": learning_resource_obj.id, + **LearningResourceSerializer(learning_resource_obj).data, + } + ) def serialize_for_deletion(opensearch_object_id): @@ -311,3 +345,23 @@ def serialize_for_deletion(opensearch_object_id): dict: the object deletion data """ return {"_id": opensearch_object_id, "_op_type": "delete"} + + +class OSLearningResourceSerializer(LearningResourceSerializer): + """ + Serializer for indexing learning resources. + Adds custom fields needed for search purposes. + Any fields added here should also be added to SOURCE_EXCLUDED_FIELDS + """ + + SOURCE_EXCLUDED_FIELDS = ["department_course_numbers"] + + department_course_numbers = serializers.SerializerMethodField() + + def get_department_course_numbers(self, instance): + if instance.resource_type == LearningResourceType.course.name: + return [ + get_department_course_number_dict(coursenum_data) + for coursenum_data in instance.course.course_numbers + ] + return [] diff --git a/learning_resources_search/serializers_test.py b/learning_resources_search/serializers_test.py index 69885ee017..bb0384a7d0 100644 --- a/learning_resources_search/serializers_test.py +++ b/learning_resources_search/serializers_test.py @@ -211,7 +211,6 @@ def test_learning_resources_search_response_serializer(settings): "professional": True, "certification": "Certificates", "prices": [2250.0], - "course": {"extra_course_numbers": None}, "learning_path": None, "podcast": None, "podcast_episode": None, diff --git a/learning_resources_search/tasks.py b/learning_resources_search/tasks.py index bcc89aa363..ac91ef3a47 100644 --- a/learning_resources_search/tasks.py +++ b/learning_resources_search/tasks.py @@ -12,7 +12,6 @@ from opensearchpy.exceptions import NotFoundError, RequestError from learning_resources.models import Course, LearningResource, Program -from learning_resources.serializers import LearningResourceSerializer from learning_resources.utils import load_course_blocklist from learning_resources_search import indexing_api as api from learning_resources_search.constants import ( @@ -22,6 +21,7 @@ IndexestoUpdate, ) from learning_resources_search.exceptions import ReindexError, RetryError +from learning_resources_search.serializers import OSLearningResourceSerializer from open_discussions.celery import app from open_discussions.utils import chunks, merge_strings @@ -50,7 +50,7 @@ def deindex_document(doc_id, object_type, **kwargs): def upsert_course(course_id): """Upsert course based on stored database information""" course_obj = LearningResource.objects.get(id=course_id) - course_data = LearningResourceSerializer(course_obj).data + course_data = OSLearningResourceSerializer(course_obj).data api.upsert_document( course_id, course_data, @@ -64,7 +64,7 @@ def upsert_program(program_id): """Upsert program based on stored database information""" program_obj = Program.objects.get(learning_resource_id=program_id) - program_data = LearningResourceSerializer(program_obj.learning_resource).data + program_data = OSLearningResourceSerializer(program_obj.learning_resource).data api.upsert_document( program_obj.learning_resource.id, program_data, diff --git a/openapi.yaml b/openapi.yaml index 8b93c1ab0d..235cafc262 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -5451,6 +5451,10 @@ components: type: string maxLength: 128 nullable: true + course_numbers: + type: object + additionalProperties: {} + nullable: true CourseRequest: type: object description: Serializer for the Course model @@ -5462,6 +5466,10 @@ components: minLength: 1 maxLength: 128 nullable: true + course_numbers: + type: object + additionalProperties: {} + nullable: true LearningPath: type: object description: Serializer for the LearningPath model From 1c2636d41cc4ba14d459421086b8f3486785be3f Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Wed, 1 Nov 2023 14:27:25 -0400 Subject: [PATCH 02/11] Fix tests --- learning_resources/etl/loaders_test.py | 22 +++--- learning_resources/etl/micromasters.py | 6 -- learning_resources/etl/mitxonline_test.py | 21 ++++- learning_resources/etl/ocw_test.py | 25 +++++- learning_resources/etl/openedx_test.py | 14 +++- learning_resources/etl/pipelines_test.py | 7 +- learning_resources/etl/prolearn_test.py | 11 ++- learning_resources/etl/utils.py | 14 ++-- learning_resources/etl/xpro_test.py | 20 ++++- learning_resources/factories.py | 49 ++++++++---- learning_resources/models_test.py | 6 +- learning_resources/serializers_test.py | 12 ++- learning_resources_search/api_test.py | 2 + learning_resources_search/serializers.py | 7 +- learning_resources_search/serializers_test.py | 79 +++++++++++++++++-- learning_resources_search/tasks_test.py | 23 ++---- 16 files changed, 232 insertions(+), 86 deletions(-) diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index 5eb270d4d5..ed555418dd 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -123,12 +123,12 @@ def test_load_program( platform = LearningResourcePlatformFactory.create() program = ( - ProgramFactory.create(courses=[], platform=platform) + ProgramFactory.create(courses=[], platform=platform.platform) if program_exists - else ProgramFactory.build(courses=[], platform=platform) + else ProgramFactory.build(courses=[], platform=platform.platform) ) - LearningResourcePlatformFactory.create(platform=platform) + LearningResourcePlatformFactory.create(platform=platform.platform) if program_exists: learning_resource = program.learning_resource @@ -138,16 +138,16 @@ def test_load_program( learning_resource.save() courses = ( - CourseFactory.create_batch(2, platform=platform) + CourseFactory.create_batch(2, platform=platform.platform) if courses_exist - else CourseFactory.build_batch(2, platform=platform) + else CourseFactory.build_batch(2, platform=platform.platform) ) before_course_count = len(courses) if courses_exist else 0 after_course_count = len(courses) if program_exists and has_retired_course: - course = CourseFactory.create(platform=platform) + course = CourseFactory.create(platform=platform.platform) before_course_count += 1 after_course_count += 1 program.learning_resource.resources.set( @@ -262,9 +262,9 @@ def test_load_course( # noqa: PLR0913 platform = LearningResourcePlatformFactory.create() course = ( - CourseFactory.create(runs=[], platform=platform) + CourseFactory.create(runs=[], platform=platform.platform) if course_exists - else CourseFactory.build(runs=[], platform=platform) + else CourseFactory.build(runs=[], platform=platform.platform) ) learning_resource = course.learning_resource @@ -283,7 +283,7 @@ def test_load_course( # noqa: PLR0913 props = { "readable_id": learning_resource.readable_id, - "platform": platform, + "platform": platform.platform, "professional": True, "title": learning_resource.title, "image": {"url": learning_resource.image.url}, @@ -375,13 +375,13 @@ def test_load_duplicate_course( platform = LearningResourcePlatformFactory.create() course = ( - CourseFactory.create(runs=[], platform=platform) + CourseFactory.create(runs=[], platform=platform.platform) if course_exists else CourseFactory.build() ) duplicate_course = ( - CourseFactory.create(runs=[], platform=platform) + CourseFactory.create(runs=[], platform=platform.platform) if duplicate_course_exists else CourseFactory.build() ) diff --git a/learning_resources/etl/micromasters.py b/learning_resources/etl/micromasters.py index d62b65c059..22001e1195 100644 --- a/learning_resources/etl/micromasters.py +++ b/learning_resources/etl/micromasters.py @@ -5,7 +5,6 @@ from learning_resources.constants import LearningResourceType, OfferedBy, PlatformType from learning_resources.etl.constants import COMMON_HEADERS, ETLSource -from learning_resources.etl.utils import generate_course_numbers_json from learning_resources.models import LearningResource OFFERED_BY = {"code": OfferedBy.mitx.name} @@ -94,11 +93,6 @@ def transform(programs_data): for run in course["course_runs"] if run.get("edx_course_key", None) ], - "course": { - "course_numbers": generate_course_numbers_json( - course["edx_key"], is_ocw=False - ), - }, } for course in sorted( program["courses"], diff --git a/learning_resources/etl/mitxonline_test.py b/learning_resources/etl/mitxonline_test.py index e9c675e0c5..5a401c7dc4 100644 --- a/learning_resources/etl/mitxonline_test.py +++ b/learning_resources/etl/mitxonline_test.py @@ -4,13 +4,14 @@ # pylint: disable=redefined-outer-name from datetime import datetime from itertools import chain +from unittest.mock import ANY from urllib.parse import urljoin import pytest from learning_resources.constants import LearningResourceType, PlatformType from learning_resources.etl import mitxonline -from learning_resources.etl.constants import ETLSource +from learning_resources.etl.constants import CourseNumberType, ETLSource from learning_resources.etl.mitxonline import ( _transform_image, parse_page_attribute, @@ -194,6 +195,15 @@ def test_mitxonline_transform_programs(mock_mitxonline_programs_data): } for course_run_data in course_data["courseruns"] ], + "course": { + "course_numbers": [ + { + "value": course_data["readable_id"], + "department": ANY, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } for course_data in program_data["courses"] if "PROCTORED EXAM" not in course_data["title"] @@ -271,6 +281,15 @@ def test_mitxonline_transform_courses(settings, mock_mitxonline_courses_data): } for course_run_data in course_data["courseruns"] ], + "course": { + "course_numbers": [ + { + "value": course_data["readable_id"], + "department": ANY, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } for course_data in mock_mitxonline_courses_data if "PROCTORED EXAM" not in course_data["title"] diff --git a/learning_resources/etl/ocw_test.py b/learning_resources/etl/ocw_test.py index 721f635306..bb15a3e7e3 100644 --- a/learning_resources/etl/ocw_test.py +++ b/learning_resources/etl/ocw_test.py @@ -9,7 +9,8 @@ from moto import mock_s3 from learning_resources.conftest import OCW_TEST_PREFIX, setup_s3_ocw -from learning_resources.etl.constants import ETLSource +from learning_resources.constants import DEPARTMENTS +from learning_resources.etl.constants import CourseNumberType, ETLSource from learning_resources.etl.ocw import ( transform_content_files, transform_contentfile, @@ -195,8 +196,26 @@ def test_transform_course(settings, legacy_uid, site_uid, expected_uid, has_extr assert transformed_json["image"]["alt"] == ( "Illustration of an aircraft wing showing connections between the disciplines of the course." ) - assert transformed_json["course"]["extra_course_numbers"] == ( - ["1", "2"] if has_extra_num else [] + assert transformed_json["course"]["course_numbers"][0] == { + "value": "16.01", + "department": {"department_id": "16", "name": DEPARTMENTS["16"]}, + "listing_type": CourseNumberType.primary.value, + } + assert transformed_json["course"]["course_numbers"][1:] == ( + [ + { + "value": "1", + "department": {"department_id": "1", "name": DEPARTMENTS["1"]}, + "listing_type": CourseNumberType.cross_listed.value, + }, + { + "value": "2", + "department": {"department_id": "2", "name": DEPARTMENTS["2"]}, + "listing_type": CourseNumberType.cross_listed.value, + }, + ] + if has_extra_num + else [] ) else: assert transformed_json is None diff --git a/learning_resources/etl/openedx_test.py b/learning_resources/etl/openedx_test.py index dbee39e75f..1427551e98 100644 --- a/learning_resources/etl/openedx_test.py +++ b/learning_resources/etl/openedx_test.py @@ -6,7 +6,7 @@ import pytest from learning_resources.constants import LearningResourceType -from learning_resources.etl.constants import COMMON_HEADERS +from learning_resources.etl.constants import COMMON_HEADERS, CourseNumberType from learning_resources.etl.openedx import ( OpenEdxConfiguration, openedx_extract_transform_factory, @@ -170,6 +170,18 @@ def test_transform_course( # noqa: PLR0913 "published": True, } ], + "course": { + "course_numbers": [ + { + "value": "MITx+15.071x", + "department": { + "department_id": "15", + "name": "Sloan School of Management", + }, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } if not is_course_run_deleted: assert transformed_courses[1]["published"] is False diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py index 4583f3d7de..baef1a5d0b 100644 --- a/learning_resources/etl/pipelines_test.py +++ b/learning_resources/etl/pipelines_test.py @@ -208,7 +208,12 @@ def test_ocw_courses_etl(settings, mocker): resource = LearningResource.objects.first() assert resource.readable_id == "16.01+fall_2005" - assert resource.course.extra_course_numbers == ["16.02", "16.03", "16.04"] + assert [num["value"] for num in resource.course.course_numbers] == [ + "16.01", + "16.02", + "16.03", + "16.04", + ] assert resource.platform.platform == PlatformType.ocw.name assert resource.offered_by.code == OfferedBy.ocw.name assert resource.departments.first().department_id == "16" diff --git a/learning_resources/etl/prolearn_test.py b/learning_resources/etl/prolearn_test.py index 3633ee8428..ff0e7e8585 100644 --- a/learning_resources/etl/prolearn_test.py +++ b/learning_resources/etl/prolearn_test.py @@ -7,7 +7,7 @@ import pytz from learning_resources.etl import prolearn -from learning_resources.etl.constants import ETLSource +from learning_resources.etl.constants import CourseNumberType, ETLSource from learning_resources.etl.prolearn import ( get_offered_by, parse_date, @@ -204,6 +204,15 @@ def test_prolearn_transform_courses(mock_mitpe_courses_data): course["start_value"], course["end_value"] ) ], + "course": { + "course_numbers": [ + { + "value": str(course["nid"]), + "department": None, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } for course in extracted_data ] diff --git a/learning_resources/etl/utils.py b/learning_resources/etl/utils.py index 3e880ebe73..f447331ba9 100644 --- a/learning_resources/etl/utils.py +++ b/learning_resources/etl/utils.py @@ -36,10 +36,8 @@ from learning_resources.etl.constants import CourseNumberType, ETLSource from learning_resources.models import ( ContentFile, - LearningResourceDepartment, LearningResourceRun, ) -from learning_resources.serializers import LearningResourceDepartmentSerializer log = logging.getLogger(__name__) @@ -548,19 +546,17 @@ def generate_course_numbers_json( course_numbers.extend(extra_nums) for idx, num in enumerate(course_numbers): dept_id = extract_valid_department_from_id(num, is_ocw=is_ocw) - dept = ( - LearningResourceDepartment.objects.filter(department_id=dept_id[0]).first() - if dept_id - else None - ) course_number_json.append( { "value": num, "listing_type": CourseNumberType.primary.value if idx == 0 else CourseNumberType.cross_listed.value, - "department": LearningResourceDepartmentSerializer(instance=dept).data - if dept + "department": { + "department_id": dept_id[0], + "name": DEPARTMENTS[dept_id[0]], + } + if dept_id else None, } ) diff --git a/learning_resources/etl/xpro_test.py b/learning_resources/etl/xpro_test.py index 447f13fecc..a0c60f766a 100644 --- a/learning_resources/etl/xpro_test.py +++ b/learning_resources/etl/xpro_test.py @@ -9,7 +9,7 @@ from learning_resources.constants import LearningResourceType, PlatformType from learning_resources.etl import xpro -from learning_resources.etl.constants import ETLSource +from learning_resources.etl.constants import CourseNumberType, ETLSource from learning_resources.etl.utils import UCC_TOPIC_MAPPINGS from open_discussions.test_utils import any_instance_of @@ -162,6 +162,15 @@ def test_xpro_transform_programs(mock_xpro_programs_data): } for course_run_data in course_data["courseruns"] ], + "course": { + "course_numbers": [ + { + "value": course_data["readable_id"], + "department": None, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } for course_data in program_data["courses"] ], @@ -218,6 +227,15 @@ def test_xpro_transform_courses(mock_xpro_courses_data): } for course_run_data in course_data["courseruns"] ], + "course": { + "course_numbers": [ + { + "value": course_data["readable_id"], + "department": None, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } for course_data in mock_xpro_courses_data ] diff --git a/learning_resources/factories.py b/learning_resources/factories.py index 6b6cfa7cf3..dbbc0c2e7b 100644 --- a/learning_resources/factories.py +++ b/learning_resources/factories.py @@ -100,7 +100,7 @@ class LearningResourcePlatformFactory(DjangoModelFactory): """Factory for LearningResourcePlatform""" platform = FuzzyChoice([platform.name for platform in constants.PlatformType]) - name = FuzzyChoice([platform.value for platform in constants.PlatformType]) + name = factory.LazyAttribute(lambda o: constants.PlatformType[o.platform].value) is_edx = Faker("boolean") has_content_files = Faker("boolean") @@ -123,8 +123,8 @@ class Meta: class LearningResourceOfferorFactory(DjangoModelFactory): """Factory for LearningResourceOfferor""" - name = FuzzyChoice([offeror.value for offeror in constants.OfferedBy]) code = FuzzyChoice([offeror.name for offeror in constants.OfferedBy]) + name = factory.LazyAttribute(lambda o: constants.OfferedBy[o.code].value) professional = Faker("boolean") class Meta: @@ -188,7 +188,15 @@ class CourseFactory(DjangoModelFactory): LearningResourceFactory, resource_type=constants.LearningResourceType.course.name, ) - extra_course_numbers = factory.List([]) + course_numbers = factory.List( + [ + { + "value": f"{random.randint(1,20)}.0001", # noqa: S311 + "department": None, + "listing_type": "Primary", + } + ] + ) @factory.post_generation def runs(self, create, extracted, **kwargs): # noqa: ARG002 @@ -210,28 +218,28 @@ def platform(self, create, extracted, **kwargs): # noqa: ARG002 return self.learning_resource.platform = LearningResourcePlatformFactory.create( - platform=extracted + platform=extracted, name=constants.PlatformType[extracted].value ) self.learning_resource.save() @factory.post_generation - def etl_source(self, create, extracted, **kwargs): # noqa: ARG002 - """Create etl_source for course.learning_resource""" + def offered_by(self, create, extracted, **kwargs): # noqa: ARG002 + """Create LearningResourceOfferor for course.learning_resource""" if not create or not extracted: return - self.learning_resource.etl_source = extracted + self.learning_resource.offered_by = LearningResourceOfferorFactory.create( + code=extracted, name=constants.OfferedBy[extracted].value + ) self.learning_resource.save() @factory.post_generation - def offered_by(self, create, extracted, **kwargs): # noqa: ARG002 - """Create LearningResourceOfferor for course.learning_resource""" + def etl_source(self, create, extracted, **kwargs): # noqa: ARG002 + """Create etl_source for course.learning_resource""" if not create or not extracted: return - self.learning_resource.offered_by = LearningResourceOfferorFactory.create( - code=extracted - ) + self.learning_resource.etl_source = extracted self.learning_resource.save() @factory.post_generation @@ -394,11 +402,24 @@ def runs(self, create, extracted, **kwargs): # noqa: ARG002 @factory.post_generation def platform(self, create, extracted, **kwargs): # noqa: ARG002 - """Create platform for program.learning_resource""" + """Create platform for course.learning_resource""" + if not create or not extracted: + return + + self.learning_resource.platform = LearningResourcePlatformFactory.create( + platform=extracted, name=constants.PlatformType[extracted].value + ) + self.learning_resource.save() + + @factory.post_generation + def offered_by(self, create, extracted, **kwargs): # noqa: ARG002 + """Create LearningResourceOfferor for course.learning_resource""" if not create or not extracted: return - self.learning_resource.platform = extracted + self.learning_resource.offered_by = LearningResourceOfferorFactory.create( + code=extracted, name=constants.OfferedBy[extracted].value + ) self.learning_resource.save() @factory.post_generation diff --git a/learning_resources/models_test.py b/learning_resources/models_test.py index 28a6ba2094..ff30d7cce4 100644 --- a/learning_resources/models_test.py +++ b/learning_resources/models_test.py @@ -87,12 +87,12 @@ def test_course_creation(): ) def test_lr_certification(offered_by, availability, has_cert): """The certification property should return the expected value""" - offered_by = LearningResourceOfferorFactory.create(name=offered_by) + offered_by = LearningResourceOfferorFactory.create(code=offered_by) course = CourseFactory.create( - offered_by=offered_by, + offered_by=offered_by.code, runs=[], - is_professional=(has_cert and offered_by != constants.OfferedBy.mitx.value), + is_professional=(has_cert and offered_by != constants.OfferedBy.mitx.name), ) assert course.learning_resource.certification == ( diff --git a/learning_resources/serializers_test.py b/learning_resources/serializers_test.py index 90c1b2dafb..bdcbc993be 100644 --- a/learning_resources/serializers_test.py +++ b/learning_resources/serializers_test.py @@ -21,10 +21,8 @@ def test_serialize_course_model(): """ Verify that a serialized course contains attributes for related objects """ - course = factories.CourseFactory.create() - serializer = serializers.LearningResourceSerializer( - instance=course.learning_resource - ) + course_resource = factories.CourseFactory.create().learning_resource + serializer = serializers.LearningResourceSerializer(instance=course_resource) assert len(serializer.data["topics"]) > 0 assert "name" in serializer.data["topics"][0] assert len(serializer.data["runs"]) == 2 @@ -35,9 +33,9 @@ def test_serialize_course_model(): assert serializer.data["departments"][0] is not None assert serializer.data["platform"] is not None assert ( - serializer.data["course"] == serializers.CourseSerializer(instance=course).data + serializer.data["course"] + == serializers.CourseSerializer(instance=course_resource.course).data ) - assert serializer.data["course"]["extra_course_numbers"] is not None assert serializer.data["program"] is None @@ -189,7 +187,7 @@ def test_content_file_serializer(): "content_title": "test title", "section": "test section", } - platform = constants.PlatformType.xpro.value + platform = constants.PlatformType.xpro.name course = factories.CourseFactory.create(platform=platform) content_file = factories.ContentFileFactory.create( run=course.learning_resource.runs.first(), **content_kwargs diff --git a/learning_resources_search/api_test.py b/learning_resources_search/api_test.py index 0a70abfce8..44e102b634 100644 --- a/learning_resources_search/api_test.py +++ b/learning_resources_search/api_test.py @@ -12,6 +12,7 @@ generate_text_clause, relevant_indexes, ) +from learning_resources_search.serializers import OSLearningResourceSerializer @pytest.mark.parametrize( @@ -513,6 +514,7 @@ def test_execute_learn_search(opensearch): } query = { + "_source": {"excludes": OSLearningResourceSerializer.SOURCE_EXCLUDED_FIELDS}, "query": { "bool": { "should": [ diff --git a/learning_resources_search/serializers.py b/learning_resources_search/serializers.py index e8aa7aa35a..b980c8fc8e 100644 --- a/learning_resources_search/serializers.py +++ b/learning_resources_search/serializers.py @@ -22,16 +22,17 @@ def get_department_course_number_dict(coursenum_data): """ Generate course number dictionary from course number data """ - department_num = coursenum_data.get("department", {}).get("department_id") + department_data = coursenum_data.get("department", {}) + department_num = department_data.get("department_id") if department_data else None course_num = coursenum_data.get("value") - if department_num[0].isdigit() and len(department_num) == 1: + if department_num and department_num[0].isdigit() and len(department_num) == 1: sort_coursenum = f"0{course_num}" else: sort_coursenum = course_num return { "coursenum": course_num, - "department": coursenum_data.get("department", {}).get("name"), + "department": department_data.get("name") if department_data else None, "primary": coursenum_data.get("listing_type") == CourseNumberType.primary.value, "sort_coursenum": sort_coursenum, } diff --git a/learning_resources_search/serializers_test.py b/learning_resources_search/serializers_test.py index bb0384a7d0..f32c8512e1 100644 --- a/learning_resources_search/serializers_test.py +++ b/learning_resources_search/serializers_test.py @@ -1,11 +1,11 @@ """Tests for opensearch serializers""" - - import pytest from django.http import QueryDict from rest_framework.renderers import JSONRenderer from learning_resources import factories +from learning_resources.constants import DEPARTMENTS +from learning_resources.etl.constants import CourseNumberType from learning_resources.models import Course, Program from learning_resources.serializers import LearningResourceSerializer from learning_resources_search import serializers @@ -35,14 +35,54 @@ def test_serialize_bulk_courses(mocker): @pytest.mark.django_db() -def test_serialize_course_for_bulk(): +@pytest.mark.parametrize( + ("readable_id", "sort_course_num"), [("1", "01"), ("15", "15"), ("CMS-W", "CMS-W")] +) +@pytest.mark.parametrize( + ("extra_num", "sorted_extra_num"), [("2", "02"), ("16", "16"), ("CC", "CC")] +) +def test_serialize_course_for_bulk( + readable_id, sort_course_num, extra_num, sorted_extra_num +): """ Test that serialize_course_for_bulk yields a valid LearningResourceSerializer """ - course = factories.CourseFactory.create() - assert serializers.serialize_course_for_bulk(course.learning_resource) == { - "_id": course.learning_resource.id, - **LearningResourceSerializer(course.learning_resource).data, + course_numbers = [ + { + "value": readable_id, + "listing_type": CourseNumberType.primary.value, + "department": { + "department_id": readable_id, + "name": DEPARTMENTS[readable_id], + }, + }, + { + "value": extra_num, + "listing_type": CourseNumberType.cross_listed.value, + "department": {"department_id": extra_num, "name": DEPARTMENTS[extra_num]}, + }, + ] + resource = factories.CourseFactory.create( + course_numbers=course_numbers + ).learning_resource + assert resource.course.course_numbers == course_numbers + assert serializers.serialize_course_for_bulk(resource) == { + "_id": resource.id, + "department_course_numbers": [ + { + "coursenum": readable_id, + "department": DEPARTMENTS[readable_id], + "primary": True, + "sort_coursenum": sort_course_num, + }, + { + "coursenum": extra_num, + "department": DEPARTMENTS[extra_num], + "primary": False, + "sort_coursenum": sorted_extra_num, + }, + ], + **LearningResourceSerializer(resource).data, } @@ -240,6 +280,18 @@ def test_learning_resources_search_response_serializer(settings): "checksum": None, } ], + "course": { + "course_numbers": [ + { + "value": "1.001", + "department": { + "department_id": "1", + "name": DEPARTMENTS["1"], + }, + "listing_type": "Primary", + } + ] + }, "image": { "id": 16, "url": "https://xpro-app-production.s3.amazonaws.com/original_images/MCPO-800x500.jpg", @@ -327,7 +379,6 @@ def test_learning_resources_search_response_serializer(settings): "professional": True, "certification": "Certificates", "prices": [2250.0], - "course": {"extra_course_numbers": None}, "learning_path": None, "podcast": None, "podcast_episode": None, @@ -357,6 +408,18 @@ def test_learning_resources_search_response_serializer(settings): "checksum": None, } ], + "course": { + "course_numbers": [ + { + "value": "1.001", + "department": { + "department_id": "1", + "name": DEPARTMENTS["1"], + }, + "listing_type": "Primary", + } + ] + }, "image": { "id": 16, "url": "https://xpro-app-production.s3.amazonaws.com/original_images/MCPO-800x500.jpg", diff --git a/learning_resources_search/tasks_test.py b/learning_resources_search/tasks_test.py index 9ca117f066..e90dbff790 100644 --- a/learning_resources_search/tasks_test.py +++ b/learning_resources_search/tasks_test.py @@ -10,11 +10,8 @@ from learning_resources.constants import PlatformType from learning_resources.factories import ( CourseFactory, - LearningResourcePlatformFactory, ProgramFactory, ) -from learning_resources.models import LearningResourcePlatform -from learning_resources.serializers import LearningResourceSerializer from learning_resources_search import tasks from learning_resources_search.constants import ( COURSE_TYPE, @@ -23,6 +20,7 @@ IndexestoUpdate, ) from learning_resources_search.exceptions import ReindexError, RetryError +from learning_resources_search.serializers import OSLearningResourceSerializer from learning_resources_search.tasks import ( deindex_document, finish_recreate_index, @@ -59,7 +57,7 @@ def test_upsert_course_task(mocked_api): """Test that upsert_course will serialize the course data and upsert it to the ES index""" course = CourseFactory.create() upsert_course(course.learning_resource_id) - data = LearningResourceSerializer(course.learning_resource).data + data = OSLearningResourceSerializer(course.learning_resource).data mocked_api.upsert_document.assert_called_once_with( course.learning_resource.id, data, @@ -72,7 +70,7 @@ def test_upsert_program_task(mocked_api): """Test that upsert_program will serialize the video data and upsert it to the ES index""" program = ProgramFactory.create() upsert_program(program) - data = LearningResourceSerializer(program.learning_resource).data + data = OSLearningResourceSerializer(program.learning_resource).data mocked_api.upsert_document.assert_called_once_with( program.learning_resource.id, data, @@ -318,7 +316,7 @@ def test_bulk_deletion_tasks(mocker, with_error, tasks_func_name, indexing_func_ None, ), (["course"], None), - (["course"], PlatformType.xpro.value), + (["course"], PlatformType.xpro.name), ], ) def test_start_update_index( @@ -342,23 +340,14 @@ def test_start_update_index( if COURSE_TYPE in indexes: courses = sorted( - [ - CourseFactory.create( - platform=LearningResourcePlatformFactory.create( - platform=platform.value - ) - ) - for platform in platforms - ], + [CourseFactory.create(platform=platform.name) for platform in platforms], key=lambda course: course.learning_resource_id, ) unpublished_courses = sorted( [ CourseFactory.create( - platform=LearningResourcePlatform.objects.get( - platform=platform.value - ), + platform=platform.name, is_unpublished=True, ) for platform in platforms From dce39a129298f4ab0906d778cbef192e113ee0ec Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Wed, 1 Nov 2023 14:32:58 -0400 Subject: [PATCH 03/11] Include migrations --- .../migrations/0023_course_course_numbers.py | 40 +++++++++++++++++++ ...0024_remove_course_extra_course_numbers.py | 16 ++++++++ 2 files changed, 56 insertions(+) create mode 100644 learning_resources/migrations/0023_course_course_numbers.py create mode 100644 learning_resources/migrations/0024_remove_course_extra_course_numbers.py diff --git a/learning_resources/migrations/0023_course_course_numbers.py b/learning_resources/migrations/0023_course_course_numbers.py new file mode 100644 index 0000000000..c60009d29e --- /dev/null +++ b/learning_resources/migrations/0023_course_course_numbers.py @@ -0,0 +1,40 @@ +# Generated by Django 4.1.10 on 2023-10-31 15:06 + +from django.db import migrations, models + +from learning_resources.etl.constants import ETLSource +from learning_resources.etl.utils import generate_course_numbers_json + + +def set_course_numbers(apps, schema_editor): + """ + Save course_numbers for every course + """ + Course = apps.get_model("learning_resources", "Course") + for course in Course.objects.select_related("learning_resource").iterator(): + is_ocw = course.learning_resource.etl_source == ETLSource.ocw.name + course.course_numbers = generate_course_numbers_json( + course.learning_resource.readable_id.split("+")[0] + if is_ocw + else course.learning_resource.readable_id, + extra_nums=course.extra_course_numbers, + is_ocw=is_ocw, + ) + course.save() + + +class Migration(migrations.Migration): + dependencies = [ + ("learning_resources", "0022_alter_learningresource_resource_type"), + ] + + operations = [ + migrations.AddField( + model_name="course", + name="course_numbers", + field=models.JSONField(blank=True, null=True), + ), + migrations.RunPython( + set_course_numbers, reverse_code=migrations.RunPython.noop + ), + ] diff --git a/learning_resources/migrations/0024_remove_course_extra_course_numbers.py b/learning_resources/migrations/0024_remove_course_extra_course_numbers.py new file mode 100644 index 0000000000..6b95e80d89 --- /dev/null +++ b/learning_resources/migrations/0024_remove_course_extra_course_numbers.py @@ -0,0 +1,16 @@ +# Generated by Django 4.1.10 on 2023-10-31 18:52 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("learning_resources", "0023_course_course_numbers"), + ] + + operations = [ + migrations.RemoveField( + model_name="course", + name="extra_course_numbers", + ), + ] From d177f728816eacd67ddb69f1c2bf31520e79e1a4 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Wed, 1 Nov 2023 15:12:50 -0400 Subject: [PATCH 04/11] openapi spec --- frontends/api/src/generated/api.ts | 12 ------------ openapi.yaml | 13 ------------- 2 files changed, 25 deletions(-) diff --git a/frontends/api/src/generated/api.ts b/frontends/api/src/generated/api.ts index 7801da3abb..3658de19ef 100644 --- a/frontends/api/src/generated/api.ts +++ b/frontends/api/src/generated/api.ts @@ -268,12 +268,6 @@ export type ContentTypeEnum = * @interface Course */ export interface Course { - /** - * - * @type {Array} - * @memberof Course - */ - extra_course_numbers?: Array | null /** * * @type {{ [key: string]: any; }} @@ -287,12 +281,6 @@ export interface Course { * @interface CourseRequest */ export interface CourseRequest { - /** - * - * @type {Array} - * @memberof CourseRequest - */ - extra_course_numbers?: Array | null /** * * @type {{ [key: string]: any; }} diff --git a/openapi.yaml b/openapi.yaml index 235cafc262..3c86d36674 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -5445,12 +5445,6 @@ components: type: object description: Serializer for the Course model properties: - extra_course_numbers: - type: array - items: - type: string - maxLength: 128 - nullable: true course_numbers: type: object additionalProperties: {} @@ -5459,13 +5453,6 @@ components: type: object description: Serializer for the Course model properties: - extra_course_numbers: - type: array - items: - type: string - minLength: 1 - maxLength: 128 - nullable: true course_numbers: type: object additionalProperties: {} From 56156c5eb2f16035f91bdab9c16d4a2d007790c2 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Wed, 1 Nov 2023 15:40:17 -0400 Subject: [PATCH 05/11] try to fix JS error --- frontends/api/src/test-utils/factories/learningResources.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontends/api/src/test-utils/factories/learningResources.ts b/frontends/api/src/test-utils/factories/learningResources.ts index 321212cb5c..4b550d3119 100644 --- a/frontends/api/src/test-utils/factories/learningResources.ts +++ b/frontends/api/src/test-utils/factories/learningResources.ts @@ -139,7 +139,7 @@ const learningResource: Factory = ( certification: faker.lorem.word(), offered_by: faker.lorem.word(), course: { - extra_course_numbers: maybe(() => repeat(faker.lorem.word)) ?? null, + course_numbers: maybe(() => repeat(faker.datatype.json)) ?? [], }, } } else if (type === ResourceTypeEnum.Program) { From 95a09ccfb4a7bee240770669f177b81147ddecbf Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Wed, 1 Nov 2023 16:32:32 -0400 Subject: [PATCH 06/11] Include department_course_numbers among search fields --- learning_resources_search/api.py | 9 ++ learning_resources_search/api_test.py | 90 +++++++++++++++++++ learning_resources_search/constants.py | 7 ++ learning_resources_search/serializers.py | 30 ++----- learning_resources_search/serializers_test.py | 1 + 5 files changed, 115 insertions(+), 22 deletions(-) diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py index 180a8ecbb2..2972513e8d 100644 --- a/learning_resources_search/api.py +++ b/learning_resources_search/api.py @@ -5,6 +5,7 @@ from learning_resources_search.connection import get_default_alias_name from learning_resources_search.constants import ( + COURSE_QUERY_FIELDS, LEARNING_RESOURCE_QUERY_FIELDS, LEARNING_RESOURCE_SEARCH_FILTERS, LEARNING_RESOURCE_TYPES, @@ -99,6 +100,14 @@ def generate_text_clause(text): } } }, + { + "nested": { + "path": "department_course_numbers", + "query": { + query_type: {"query": text, "fields": COURSE_QUERY_FIELDS} + }, + } + }, { "nested": { "path": "runs", diff --git a/learning_resources_search/api_test.py b/learning_resources_search/api_test.py index 44e102b634..faf8d3a35e 100644 --- a/learning_resources_search/api_test.py +++ b/learning_resources_search/api_test.py @@ -72,6 +72,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", + "department_course_numbers", ], } }, @@ -94,6 +95,20 @@ def test_generate_text_clause(): } } }, + { + "nested": { + "path": "department_course_numbers", + "query": { + "multi_match": { + "query": "math", + "fields": [ + "department_course_numbers.coursenum", + "department_course_numbers.department", + ], + } + }, + } + }, { "nested": { "path": "runs", @@ -149,6 +164,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", + "department_course_numbers", ], } }, @@ -165,6 +181,20 @@ def test_generate_text_clause(): "readable_id": {"value": "MATH*", "rewrite": "constant_score"} } }, + { + "nested": { + "path": "department_course_numbers", + "query": { + "multi_match": { + "query": "math", + "fields": [ + "department_course_numbers.coursenum", + "department_course_numbers.department", + ], + } + }, + } + }, { "nested": { "path": "runs", @@ -220,6 +250,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", + "department_course_numbers", ], } }, @@ -242,6 +273,20 @@ def test_generate_text_clause(): } } }, + { + "nested": { + "path": "department_course_numbers", + "query": { + "query_string": { + "query": '"math"', + "fields": [ + "department_course_numbers.coursenum", + "department_course_numbers.department", + ], + } + }, + } + }, { "nested": { "path": "runs", @@ -297,6 +342,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", + "department_course_numbers", ], } }, @@ -316,6 +362,20 @@ def test_generate_text_clause(): "readable_id": {"value": '"MATH"*', "rewrite": "constant_score"} } }, + { + "nested": { + "path": "department_course_numbers", + "query": { + "query_string": { + "query": '"math"', + "fields": [ + "department_course_numbers.coursenum", + "department_course_numbers.department", + ], + } + }, + } + }, { "nested": { "path": "runs", @@ -540,6 +600,7 @@ def test_execute_learn_search(opensearch): "offered_by", "department", "resource_content_tags", + "department_course_numbers", ], } }, @@ -564,6 +625,20 @@ def test_execute_learn_search(opensearch): } } }, + { + "nested": { + "path": "department_course_numbers", + "query": { + "multi_match": { + "query": "math", + "fields": [ + "department_course_numbers.coursenum", + "department_course_numbers.department", + ], + } + }, + } + }, { "nested": { "path": "runs", @@ -620,6 +695,7 @@ def test_execute_learn_search(opensearch): "offered_by", "department", "resource_content_tags", + "department_course_numbers", ], } }, @@ -642,6 +718,20 @@ def test_execute_learn_search(opensearch): } } }, + { + "nested": { + "path": "department_course_numbers", + "query": { + "multi_match": { + "query": "math", + "fields": [ + "department_course_numbers.coursenum", + "department_course_numbers.department", + ], + } + }, + } + }, { "nested": { "path": "runs", diff --git a/learning_resources_search/constants.py b/learning_resources_search/constants.py index 8795cdf2b5..a7086e0b6c 100644 --- a/learning_resources_search/constants.py +++ b/learning_resources_search/constants.py @@ -67,6 +67,7 @@ class IndexestoUpdate(Enum): "topic": "topics.name", "level": "runs.level", "department": "departments.name", + "department_course_numbers": "department_course_numbers.coursenum", } ENGLISH_TEXT_FIELD = { @@ -171,6 +172,7 @@ class IndexestoUpdate(Enum): "offered_by", "department", "resource_content_tags", + "department_course_numbers", ] AGGREGATIONS = [ @@ -187,6 +189,11 @@ class IndexestoUpdate(Enum): TOPICS_QUERY_FIELDS = ["topics.name"] +COURSE_QUERY_FIELDS = [ + "department_course_numbers.coursenum", + "department_course_numbers.department", +] + RUNS_QUERY_FIELDS = [ "runs.year", "runs.semester", diff --git a/learning_resources_search/serializers.py b/learning_resources_search/serializers.py index b980c8fc8e..9f2e03c87f 100644 --- a/learning_resources_search/serializers.py +++ b/learning_resources_search/serializers.py @@ -38,16 +38,6 @@ def get_department_course_number_dict(coursenum_data): } -def customize_resource_data_for_search(data: dict) -> dict: - """Add any special search-related fields to the serializer data here""" - if data.get("resource_type") == LearningResourceType.course.name: - data["department_course_numbers"] = [ - get_department_course_number_dict(coursenum) - for coursenum in data["course"]["course_numbers"] - ] - return data - - def extract_values(obj, key): """ Pull all values of specified key from nested JSON. @@ -290,12 +280,10 @@ def serialize_course_for_bulk(learning_resource_obj): Args: learning_resource_obj (LearningResource): A course learning resource """ - return customize_resource_data_for_search( - { - "_id": learning_resource_obj.id, - **LearningResourceSerializer(learning_resource_obj).data, - } - ) + return { + "_id": learning_resource_obj.id, + **OSLearningResourceSerializer(learning_resource_obj).data, + } def serialize_bulk_programs(ids): @@ -327,12 +315,10 @@ def serialize_program_for_bulk(learning_resource_obj): Args: learning_resource_obj (LearningResource): A program learning_resource object """ - return customize_resource_data_for_search( - { - "_id": learning_resource_obj.id, - **LearningResourceSerializer(learning_resource_obj).data, - } - ) + return { + "_id": learning_resource_obj.id, + **OSLearningResourceSerializer(learning_resource_obj).data, + } def serialize_for_deletion(opensearch_object_id): diff --git a/learning_resources_search/serializers_test.py b/learning_resources_search/serializers_test.py index f32c8512e1..503f60fb08 100644 --- a/learning_resources_search/serializers_test.py +++ b/learning_resources_search/serializers_test.py @@ -112,6 +112,7 @@ def test_serialize_program_for_bulk(): program = factories.ProgramFactory.create() assert serializers.serialize_program_for_bulk(program.learning_resource) == { "_id": program.learning_resource.id, + "department_course_numbers": [], **LearningResourceSerializer(program.learning_resource).data, } From 65fa42d39c4c41362f16056c642398a8b289bcde Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 2 Nov 2023 11:27:01 -0400 Subject: [PATCH 07/11] A fair bit of refactoring --- learning_resources/etl/constants.py | 4 +- learning_resources/etl/prolearn.py | 4 +- learning_resources/etl/prolearn_test.py | 12 +---- learning_resources_search/api.py | 2 +- learning_resources_search/api_test.py | 42 +++++++-------- learning_resources_search/constants.py | 33 +++++++----- .../management/commands/update_index.py | 3 +- learning_resources_search/serializers.py | 54 +++++++++---------- learning_resources_search/serializers_test.py | 30 +++++------ 9 files changed, 84 insertions(+), 100 deletions(-) diff --git a/learning_resources/etl/constants.py b/learning_resources/etl/constants.py index 3b60a6206c..47a6fbdb39 100644 --- a/learning_resources/etl/constants.py +++ b/learning_resources/etl/constants.py @@ -53,5 +53,5 @@ class ETLSource(Enum): class CourseNumberType(Enum): """Enum of course number types""" - primary = "Primary" - cross_listed = "Cross-listed" + primary = "primary" + cross_listed = "cross-listed" diff --git a/learning_resources/etl/prolearn.py b/learning_resources/etl/prolearn.py index 6189ff265a..1911fa9e03 100644 --- a/learning_resources/etl/prolearn.py +++ b/learning_resources/etl/prolearn.py @@ -11,7 +11,7 @@ from django.conf import settings from learning_resources.etl.constants import ETLSource -from learning_resources.etl.utils import generate_course_numbers_json, transform_topics +from learning_resources.etl.utils import transform_topics from learning_resources.models import LearningResourceOfferor, LearningResourcePlatform log = logging.getLogger(__name__) @@ -317,7 +317,7 @@ def _transform_course( "image": parse_image(course), "description": course["body"], "course": { - "course_numbers": generate_course_numbers_json(course["nid"]), + "course_numbers": [], }, "published": True, "topics": parse_topic(course), diff --git a/learning_resources/etl/prolearn_test.py b/learning_resources/etl/prolearn_test.py index ff0e7e8585..5c501fdcb9 100644 --- a/learning_resources/etl/prolearn_test.py +++ b/learning_resources/etl/prolearn_test.py @@ -7,7 +7,7 @@ import pytz from learning_resources.etl import prolearn -from learning_resources.etl.constants import CourseNumberType, ETLSource +from learning_resources.etl.constants import ETLSource from learning_resources.etl.prolearn import ( get_offered_by, parse_date, @@ -204,15 +204,7 @@ def test_prolearn_transform_courses(mock_mitpe_courses_data): course["start_value"], course["end_value"] ) ], - "course": { - "course_numbers": [ - { - "value": str(course["nid"]), - "department": None, - "listing_type": CourseNumberType.primary.value, - } - ] - }, + "course": {"course_numbers": []}, } for course in extracted_data ] diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py index 2972513e8d..3c5c3f7cba 100644 --- a/learning_resources_search/api.py +++ b/learning_resources_search/api.py @@ -102,7 +102,7 @@ def generate_text_clause(text): }, { "nested": { - "path": "department_course_numbers", + "path": "course.course_numbers", "query": { query_type: {"query": text, "fields": COURSE_QUERY_FIELDS} }, diff --git a/learning_resources_search/api_test.py b/learning_resources_search/api_test.py index faf8d3a35e..e4c7e568bb 100644 --- a/learning_resources_search/api_test.py +++ b/learning_resources_search/api_test.py @@ -72,7 +72,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", - "department_course_numbers", + "course", ], } }, @@ -97,13 +97,12 @@ def test_generate_text_clause(): }, { "nested": { - "path": "department_course_numbers", + "path": "course.course_numbers", "query": { "multi_match": { "query": "math", "fields": [ - "department_course_numbers.coursenum", - "department_course_numbers.department", + "course.course_numbers.value", ], } }, @@ -164,7 +163,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", - "department_course_numbers", + "course", ], } }, @@ -183,13 +182,12 @@ def test_generate_text_clause(): }, { "nested": { - "path": "department_course_numbers", + "path": "course.course_numbers", "query": { "multi_match": { "query": "math", "fields": [ - "department_course_numbers.coursenum", - "department_course_numbers.department", + "course.course_numbers.value", ], } }, @@ -250,7 +248,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", - "department_course_numbers", + "course", ], } }, @@ -275,13 +273,12 @@ def test_generate_text_clause(): }, { "nested": { - "path": "department_course_numbers", + "path": "course.course_numbers", "query": { "query_string": { "query": '"math"', "fields": [ - "department_course_numbers.coursenum", - "department_course_numbers.department", + "course.course_numbers.value", ], } }, @@ -342,7 +339,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", - "department_course_numbers", + "course", ], } }, @@ -364,13 +361,12 @@ def test_generate_text_clause(): }, { "nested": { - "path": "department_course_numbers", + "path": "course.course_numbers", "query": { "query_string": { "query": '"math"', "fields": [ - "department_course_numbers.coursenum", - "department_course_numbers.department", + "course.course_numbers.value", ], } }, @@ -600,7 +596,7 @@ def test_execute_learn_search(opensearch): "offered_by", "department", "resource_content_tags", - "department_course_numbers", + "course", ], } }, @@ -627,13 +623,12 @@ def test_execute_learn_search(opensearch): }, { "nested": { - "path": "department_course_numbers", + "path": "course.course_numbers", "query": { "multi_match": { "query": "math", "fields": [ - "department_course_numbers.coursenum", - "department_course_numbers.department", + "course.course_numbers.value", ], } }, @@ -695,7 +690,7 @@ def test_execute_learn_search(opensearch): "offered_by", "department", "resource_content_tags", - "department_course_numbers", + "course", ], } }, @@ -720,13 +715,12 @@ def test_execute_learn_search(opensearch): }, { "nested": { - "path": "department_course_numbers", + "path": "course.course_numbers", "query": { "multi_match": { "query": "math", "fields": [ - "department_course_numbers.coursenum", - "department_course_numbers.department", + "course.course_numbers.value" ], } }, diff --git a/learning_resources_search/constants.py b/learning_resources_search/constants.py index a7086e0b6c..8a0dd24d55 100644 --- a/learning_resources_search/constants.py +++ b/learning_resources_search/constants.py @@ -61,13 +61,13 @@ class IndexestoUpdate(Enum): "platform", "professional", "id", + "course", ] SEARCH_NESTED_FILTERS = { "topic": "topics.name", "level": "runs.level", "department": "departments.name", - "department_course_numbers": "department_course_numbers.coursenum", } ENGLISH_TEXT_FIELD = { @@ -120,14 +120,23 @@ class IndexestoUpdate(Enum): }, "offered_by": {"type": "keyword"}, "resource_content_tags": {"type": "keyword"}, - "department_course_numbers": { - "type": "nested", + "course": { "properties": { - "coursenum": {"type": "keyword"}, - "sort_coursenum": {"type": "keyword"}, - "department": {"type": "keyword"}, - "primary": {"type": "boolean"}, - }, + "course_numbers": { + "type": "nested", + "properties": { + "value": {"type": "keyword"}, + "sort_coursenum": {"type": "keyword"}, + "department": { + "properties": { + "department_id": {"type": "keyword"}, + "name": {"type": "keyword"}, + } + }, + "primary": {"type": "boolean"}, + }, + } + } }, "runs": { "type": "nested", @@ -172,7 +181,7 @@ class IndexestoUpdate(Enum): "offered_by", "department", "resource_content_tags", - "department_course_numbers", + "course", ] AGGREGATIONS = [ @@ -190,8 +199,7 @@ class IndexestoUpdate(Enum): TOPICS_QUERY_FIELDS = ["topics.name"] COURSE_QUERY_FIELDS = [ - "department_course_numbers.coursenum", - "department_course_numbers.department", + "course.course_numbers.value", ] RUNS_QUERY_FIELDS = [ @@ -212,6 +220,3 @@ class IndexestoUpdate(Enum): } SEARCH_CONN_EXCEPTIONS = (ESConnectionError, UrlTimeoutError) -SOURCE_EXCLUSIONS = [ - "department_course_numbers", -] diff --git a/learning_resources_search/management/commands/update_index.py b/learning_resources_search/management/commands/update_index.py index fe0857d9f9..fce9d1751d 100644 --- a/learning_resources_search/management/commands/update_index.py +++ b/learning_resources_search/management/commands/update_index.py @@ -16,8 +16,7 @@ def add_arguments(self, parser): allowed_course_platforms = [ platform.value for platform in PlatformType - if platform.value - not in [PlatformType.youtube.value, PlatformType.podcast.value] + if platform.value not in [PlatformType.podcast.value] ] parser.add_argument( diff --git a/learning_resources_search/serializers.py b/learning_resources_search/serializers.py index 9f2e03c87f..35ef081d66 100644 --- a/learning_resources_search/serializers.py +++ b/learning_resources_search/serializers.py @@ -18,24 +18,22 @@ log = logging.getLogger() -def get_department_course_number_dict(coursenum_data): - """ - Generate course number dictionary from course number data - """ - department_data = coursenum_data.get("department", {}) - department_num = department_data.get("department_id") if department_data else None - course_num = coursenum_data.get("value") - if department_num and department_num[0].isdigit() and len(department_num) == 1: - sort_coursenum = f"0{course_num}" - else: - sort_coursenum = course_num - - return { - "coursenum": course_num, - "department": department_data.get("name") if department_data else None, - "primary": coursenum_data.get("listing_type") == CourseNumberType.primary.value, - "sort_coursenum": sort_coursenum, - } +def add_extra_course_number_fields(course_numbers: list[dict]): + """Add sortable coursenums and primary(boolean) fields to course.course_numbers""" + for coursenum_data in course_numbers: + department_data = coursenum_data.get("department", {}) + department_num = ( + department_data.get("department_id") if department_data else None + ) + course_num = coursenum_data.get("value") + if department_num and department_num[0].isdigit() and len(department_num) == 1: + sort_coursenum = f"0{course_num}" + else: + sort_coursenum = course_num + coursenum_data["primary"] = ( + coursenum_data.get("listing_type") == CourseNumberType.primary.value + ) + coursenum_data["sort_coursenum"] = sort_coursenum def extract_values(obj, key): @@ -341,14 +339,16 @@ class OSLearningResourceSerializer(LearningResourceSerializer): Any fields added here should also be added to SOURCE_EXCLUDED_FIELDS """ - SOURCE_EXCLUDED_FIELDS = ["department_course_numbers"] - - department_course_numbers = serializers.SerializerMethodField() + SOURCE_EXCLUDED_FIELDS = [ + "course.course_numbers.sort_coursenum", + "course.course_numbers.primary", + ] - def get_department_course_numbers(self, instance): + def to_representation(self, instance): + """ + Override to_representation to modify/add fields needed only for indexing + """ + data = super().to_representation(instance) if instance.resource_type == LearningResourceType.course.name: - return [ - get_department_course_number_dict(coursenum_data) - for coursenum_data in instance.course.course_numbers - ] - return [] + add_extra_course_number_fields(data["course"]["course_numbers"]) + return data diff --git a/learning_resources_search/serializers_test.py b/learning_resources_search/serializers_test.py index 503f60fb08..572d4132a5 100644 --- a/learning_resources_search/serializers_test.py +++ b/learning_resources_search/serializers_test.py @@ -66,25 +66,20 @@ def test_serialize_course_for_bulk( course_numbers=course_numbers ).learning_resource assert resource.course.course_numbers == course_numbers - assert serializers.serialize_course_for_bulk(resource) == { - "_id": resource.id, - "department_course_numbers": [ - { - "coursenum": readable_id, - "department": DEPARTMENTS[readable_id], - "primary": True, - "sort_coursenum": sort_course_num, - }, - { - "coursenum": extra_num, - "department": DEPARTMENTS[extra_num], - "primary": False, - "sort_coursenum": sorted_extra_num, - }, - ], - **LearningResourceSerializer(resource).data, + expected_data = {"_id": resource.id, **LearningResourceSerializer(resource).data} + expected_data["course"]["course_numbers"][0] = { + **expected_data["course"]["course_numbers"][0], + "primary": True, + "sort_coursenum": sort_course_num, + } + expected_data["course"]["course_numbers"][1] = { + **expected_data["course"]["course_numbers"][1], + "primary": False, + "sort_coursenum": sorted_extra_num, } + assert serializers.serialize_course_for_bulk(resource) == expected_data + @pytest.mark.django_db() def test_serialize_bulk_programs(mocker): @@ -112,7 +107,6 @@ def test_serialize_program_for_bulk(): program = factories.ProgramFactory.create() assert serializers.serialize_program_for_bulk(program.learning_resource) == { "_id": program.learning_resource.id, - "department_course_numbers": [], **LearningResourceSerializer(program.learning_resource).data, } From 6fffd903a4bd9deca2af0792668233a598e83889 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 2 Nov 2023 11:29:45 -0400 Subject: [PATCH 08/11] update migration function --- learning_resources/migrations/0023_course_course_numbers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/learning_resources/migrations/0023_course_course_numbers.py b/learning_resources/migrations/0023_course_course_numbers.py index c60009d29e..a6bbf3fa6a 100644 --- a/learning_resources/migrations/0023_course_course_numbers.py +++ b/learning_resources/migrations/0023_course_course_numbers.py @@ -14,9 +14,7 @@ def set_course_numbers(apps, schema_editor): for course in Course.objects.select_related("learning_resource").iterator(): is_ocw = course.learning_resource.etl_source == ETLSource.ocw.name course.course_numbers = generate_course_numbers_json( - course.learning_resource.readable_id.split("+")[0] - if is_ocw - else course.learning_resource.readable_id, + course.learning_resource.readable_id, extra_nums=course.extra_course_numbers, is_ocw=is_ocw, ) From 4e0c345153dee522d38d279b6623011ee4661632 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 2 Nov 2023 11:53:54 -0400 Subject: [PATCH 09/11] update migration function back to what it was --- learning_resources/migrations/0023_course_course_numbers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/learning_resources/migrations/0023_course_course_numbers.py b/learning_resources/migrations/0023_course_course_numbers.py index a6bbf3fa6a..c60009d29e 100644 --- a/learning_resources/migrations/0023_course_course_numbers.py +++ b/learning_resources/migrations/0023_course_course_numbers.py @@ -14,7 +14,9 @@ def set_course_numbers(apps, schema_editor): for course in Course.objects.select_related("learning_resource").iterator(): is_ocw = course.learning_resource.etl_source == ETLSource.ocw.name course.course_numbers = generate_course_numbers_json( - course.learning_resource.readable_id, + course.learning_resource.readable_id.split("+")[0] + if is_ocw + else course.learning_resource.readable_id, extra_nums=course.extra_course_numbers, is_ocw=is_ocw, ) From 6f244466d58d240d5d76f74911d04ce3e48a33aa Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 2 Nov 2023 16:13:37 -0400 Subject: [PATCH 10/11] Adjust code to fit in better with contentfile PR --- learning_resources_search/api.py | 10 +-- learning_resources_search/api_test.py | 4 +- learning_resources_search/constants.py | 5 ++ learning_resources_search/serializers.py | 81 ++++++++++++------------ learning_resources_search/tasks.py | 6 +- learning_resources_search/tasks_test.py | 6 +- 6 files changed, 57 insertions(+), 55 deletions(-) diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py index 3c5c3f7cba..816d480282 100644 --- a/learning_resources_search/api.py +++ b/learning_resources_search/api.py @@ -12,9 +12,9 @@ RUN_INSTRUCTORS_QUERY_FIELDS, RUNS_QUERY_FIELDS, SEARCH_NESTED_FILTERS, + SOURCE_EXCLUDED_FIELDS, TOPICS_QUERY_FIELDS, ) -from learning_resources_search.serializers import OSLearningResourceSerializer SIMILAR_RESOURCE_RELEVANT_FIELDS = ["title", "short_description"] LEARN_SUGGEST_FIELDS = ["title.trigram", "description.trigram"] @@ -336,13 +336,7 @@ def execute_learn_search(search_params): ) search = Search(index=",".join(indexes)) - search = search.source( - fields={ - "excludes": [ - *OSLearningResourceSerializer.SOURCE_EXCLUDED_FIELDS, - ] - } - ) + search = search.source(fields={"excludes": SOURCE_EXCLUDED_FIELDS}) if search_params.get("offset"): search = search.extra(from_=search_params.get("offset")) diff --git a/learning_resources_search/api_test.py b/learning_resources_search/api_test.py index e4c7e568bb..8d3739c3e6 100644 --- a/learning_resources_search/api_test.py +++ b/learning_resources_search/api_test.py @@ -12,7 +12,7 @@ generate_text_clause, relevant_indexes, ) -from learning_resources_search.serializers import OSLearningResourceSerializer +from learning_resources_search.constants import SOURCE_EXCLUDED_FIELDS @pytest.mark.parametrize( @@ -570,7 +570,7 @@ def test_execute_learn_search(opensearch): } query = { - "_source": {"excludes": OSLearningResourceSerializer.SOURCE_EXCLUDED_FIELDS}, + "_source": {"excludes": SOURCE_EXCLUDED_FIELDS}, "query": { "bool": { "should": [ diff --git a/learning_resources_search/constants.py b/learning_resources_search/constants.py index 8a0dd24d55..0a4205936a 100644 --- a/learning_resources_search/constants.py +++ b/learning_resources_search/constants.py @@ -220,3 +220,8 @@ class IndexestoUpdate(Enum): } SEARCH_CONN_EXCEPTIONS = (ESConnectionError, UrlTimeoutError) + +SOURCE_EXCLUDED_FIELDS = [ + "course.course_numbers.sort_coursenum", + "course.course_numbers.primary", +] diff --git a/learning_resources_search/serializers.py b/learning_resources_search/serializers.py index 35ef081d66..a59e3c4f00 100644 --- a/learning_resources_search/serializers.py +++ b/learning_resources_search/serializers.py @@ -18,22 +18,47 @@ log = logging.getLogger() -def add_extra_course_number_fields(course_numbers: list[dict]): +def add_extra_course_number_fields(resource_data: dict): """Add sortable coursenums and primary(boolean) fields to course.course_numbers""" - for coursenum_data in course_numbers: - department_data = coursenum_data.get("department", {}) - department_num = ( - department_data.get("department_id") if department_data else None - ) - course_num = coursenum_data.get("value") - if department_num and department_num[0].isdigit() and len(department_num) == 1: - sort_coursenum = f"0{course_num}" - else: - sort_coursenum = course_num - coursenum_data["primary"] = ( - coursenum_data.get("listing_type") == CourseNumberType.primary.value - ) - coursenum_data["sort_coursenum"] = sort_coursenum + if resource_data.get("course"): + course_numbers = resource_data["course"].get("course_numbers", []) + for coursenum_data in course_numbers: + department_data = coursenum_data.get("department", {}) + department_num = ( + department_data.get("department_id") if department_data else None + ) + course_num = coursenum_data.get("value") + if ( + department_num + and department_num[0].isdigit() + and len(department_num) == 1 + ): + sort_coursenum = f"0{course_num}" + else: + sort_coursenum = course_num + coursenum_data["primary"] = ( + coursenum_data.get("listing_type") == CourseNumberType.primary.value + ) + coursenum_data["sort_coursenum"] = sort_coursenum + + +def transform_resource_data(resource_data: dict): + """ + Apply transformations on the resource data + """ + add_extra_course_number_fields(resource_data) + return resource_data + + +def serialize_learning_resource_for_update( + learning_resource_obj: LearningResource, +) -> dict: + """Add any special search-related fields to the serializer data here""" + return { + **transform_resource_data( + LearningResourceSerializer(learning_resource_obj).data + ), + } def extract_values(obj, key): @@ -280,7 +305,7 @@ def serialize_course_for_bulk(learning_resource_obj): """ return { "_id": learning_resource_obj.id, - **OSLearningResourceSerializer(learning_resource_obj).data, + **serialize_learning_resource_for_update(learning_resource_obj), } @@ -315,7 +340,7 @@ def serialize_program_for_bulk(learning_resource_obj): """ return { "_id": learning_resource_obj.id, - **OSLearningResourceSerializer(learning_resource_obj).data, + **serialize_learning_resource_for_update(learning_resource_obj), } @@ -330,25 +355,3 @@ def serialize_for_deletion(opensearch_object_id): dict: the object deletion data """ return {"_id": opensearch_object_id, "_op_type": "delete"} - - -class OSLearningResourceSerializer(LearningResourceSerializer): - """ - Serializer for indexing learning resources. - Adds custom fields needed for search purposes. - Any fields added here should also be added to SOURCE_EXCLUDED_FIELDS - """ - - SOURCE_EXCLUDED_FIELDS = [ - "course.course_numbers.sort_coursenum", - "course.course_numbers.primary", - ] - - def to_representation(self, instance): - """ - Override to_representation to modify/add fields needed only for indexing - """ - data = super().to_representation(instance) - if instance.resource_type == LearningResourceType.course.name: - add_extra_course_number_fields(data["course"]["course_numbers"]) - return data diff --git a/learning_resources_search/tasks.py b/learning_resources_search/tasks.py index ac91ef3a47..1ec81ec790 100644 --- a/learning_resources_search/tasks.py +++ b/learning_resources_search/tasks.py @@ -21,7 +21,7 @@ IndexestoUpdate, ) from learning_resources_search.exceptions import ReindexError, RetryError -from learning_resources_search.serializers import OSLearningResourceSerializer +from learning_resources_search.serializers import serialize_learning_resource_for_update from open_discussions.celery import app from open_discussions.utils import chunks, merge_strings @@ -50,7 +50,7 @@ def deindex_document(doc_id, object_type, **kwargs): def upsert_course(course_id): """Upsert course based on stored database information""" course_obj = LearningResource.objects.get(id=course_id) - course_data = OSLearningResourceSerializer(course_obj).data + course_data = serialize_learning_resource_for_update(course_obj) api.upsert_document( course_id, course_data, @@ -64,7 +64,7 @@ def upsert_program(program_id): """Upsert program based on stored database information""" program_obj = Program.objects.get(learning_resource_id=program_id) - program_data = OSLearningResourceSerializer(program_obj.learning_resource).data + program_data = serialize_learning_resource_for_update(program_obj.learning_resource) api.upsert_document( program_obj.learning_resource.id, program_data, diff --git a/learning_resources_search/tasks_test.py b/learning_resources_search/tasks_test.py index e90dbff790..704776ce3e 100644 --- a/learning_resources_search/tasks_test.py +++ b/learning_resources_search/tasks_test.py @@ -20,7 +20,7 @@ IndexestoUpdate, ) from learning_resources_search.exceptions import ReindexError, RetryError -from learning_resources_search.serializers import OSLearningResourceSerializer +from learning_resources_search.serializers import serialize_learning_resource_for_update from learning_resources_search.tasks import ( deindex_document, finish_recreate_index, @@ -57,7 +57,7 @@ def test_upsert_course_task(mocked_api): """Test that upsert_course will serialize the course data and upsert it to the ES index""" course = CourseFactory.create() upsert_course(course.learning_resource_id) - data = OSLearningResourceSerializer(course.learning_resource).data + data = serialize_learning_resource_for_update(course.learning_resource) mocked_api.upsert_document.assert_called_once_with( course.learning_resource.id, data, @@ -70,7 +70,7 @@ def test_upsert_program_task(mocked_api): """Test that upsert_program will serialize the video data and upsert it to the ES index""" program = ProgramFactory.create() upsert_program(program) - data = OSLearningResourceSerializer(program.learning_resource).data + data = serialize_learning_resource_for_update(program.learning_resource) mocked_api.upsert_document.assert_called_once_with( program.learning_resource.id, data, From 39acf652159b8ff32a4dead19b8825ba3cad488a Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 2 Nov 2023 16:20:12 -0400 Subject: [PATCH 11/11] Update some docstrings --- learning_resources_search/serializers.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/learning_resources_search/serializers.py b/learning_resources_search/serializers.py index a59e3c4f00..e1235679cb 100644 --- a/learning_resources_search/serializers.py +++ b/learning_resources_search/serializers.py @@ -42,9 +42,16 @@ def add_extra_course_number_fields(resource_data: dict): coursenum_data["sort_coursenum"] = sort_coursenum -def transform_resource_data(resource_data: dict): +def transform_resource_data(resource_data: dict) -> dict: """ Apply transformations on the resource data + + Args: + resource_data(dict): The resource data + + Returns: + dict: The transformed resource data + """ add_extra_course_number_fields(resource_data) return resource_data @@ -53,7 +60,16 @@ def transform_resource_data(resource_data: dict): def serialize_learning_resource_for_update( learning_resource_obj: LearningResource, ) -> dict: - """Add any special search-related fields to the serializer data here""" + """ + Add any special search-related fields to the serializer data here + + Args: + learning_resource_obj(LearningResource): The learning resource object + + Returns: + dict: The serialized and transformed resource data + + """ return { **transform_resource_data( LearningResourceSerializer(learning_resource_obj).data